diff --git a/src/hotspot/cpu/aarch64/c1_Defs_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_Defs_aarch64.hpp index 9470caae9fe5a..62826a2bc8aea 100644 --- a/src/hotspot/cpu/aarch64/c1_Defs_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_Defs_aarch64.hpp @@ -32,11 +32,6 @@ enum { pd_hi_word_offset_in_bytes = BytesPerWord }; -// explicit rounding operations are required to implement the strictFP mode -enum { - pd_strict_fp_requires_explicit_rounding = false -}; - // FIXME: There are no callee-saved // registers diff --git a/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.cpp deleted file mode 100644 index c50da1c8bebf3..0000000000000 --- a/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.cpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -//-------------------------------------------------------- -// FpuStackSim -//-------------------------------------------------------- - -// No FPU stack on AARCH64 -#include "precompiled.hpp" diff --git a/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.hpp deleted file mode 100644 index e1cfcf3de6c99..0000000000000 --- a/src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_AARCH64_C1_FPUSTACKSIM_AARCH64_HPP -#define CPU_AARCH64_C1_FPUSTACKSIM_AARCH64_HPP - -// No FPU stack on AARCH64 -class FpuStackSim; - -#endif // CPU_AARCH64_C1_FPUSTACKSIM_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp index 4ae2da6680263..41175baa456ff 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp @@ -408,7 +408,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); - set_result(x, round_item(reg)); + set_result(x, reg); } // for _ladd, _lmul, _lsub, _ldiv, _lrem diff --git a/src/hotspot/cpu/aarch64/c1_LinearScan_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LinearScan_aarch64.cpp deleted file mode 100644 index 4b426694cd7e3..0000000000000 --- a/src/hotspot/cpu/aarch64/c1_LinearScan_aarch64.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_Instruction.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/bitMap.inline.hpp" - -void LinearScan::allocate_fpu_stack() { - // No FPU stack on AArch64 -} diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 836caa86cb0af..cb1627a7ed169 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -1402,9 +1402,6 @@ void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, } } -void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } - - void InterpreterMacroAssembler::notify_method_entry() { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp index bc2070d283d7d..059d79c3cb93c 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.hpp @@ -303,8 +303,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // only if +VerifyOops && state == atos #define interp_verify_oop(reg, state) _interp_verify_oop(reg, state, __FILE__, __LINE__); void _interp_verify_oop(Register reg, TosState state, const char* file, int line); - // only if +VerifyFPU && (state == ftos || state == dtos) - void verify_FPU(int stack_depth, TosState state = ftos); typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; diff --git a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp index 447c5f57a8aa5..a6cd055775870 100644 --- a/src/hotspot/cpu/aarch64/matcher_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/matcher_aarch64.hpp @@ -115,9 +115,6 @@ // C code as the Java calling convention forces doubles to be aligned. static const bool misaligned_doubles_ok = true; - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. - static const bool strict_fp_requires_explicit_rounding = false; - // Are floats converted to double when stored to stack during // deoptimization? static constexpr bool float_in_double() { return false; } diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index f70450b722223..e6df12a14fa80 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -1801,12 +1801,6 @@ address TemplateInterpreterGenerator::generate_currentThread() { return entry_point; } -// Not supported -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } - //----------------------------------------------------------------------------- // Exceptions diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index a3db5c0619ced..386e04fb7ce63 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -4510,18 +4510,6 @@ instruct unnecessary_membar_volatile() %{ %} //----------Register Move Instructions----------------------------------------- -// instruct roundDouble_nop(regD dst) %{ -// match(Set dst (RoundDouble dst)); -// ins_pipe(empty); -// %} - - -// instruct roundFloat_nop(regF dst) %{ -// match(Set dst (RoundFloat dst)); -// ins_pipe(empty); -// %} - - // Cast Index to Pointer for unsafe natives instruct castX2P(iRegX src, iRegP dst) %{ diff --git a/src/hotspot/cpu/arm/c1_Defs_arm.hpp b/src/hotspot/cpu/arm/c1_Defs_arm.hpp index 32e0b02964879..5145efd011a8d 100644 --- a/src/hotspot/cpu/arm/c1_Defs_arm.hpp +++ b/src/hotspot/cpu/arm/c1_Defs_arm.hpp @@ -31,11 +31,6 @@ enum { pd_hi_word_offset_in_bytes = BytesPerWord }; -// explicit rounding operations are required to implement the strictFP mode -enum { - pd_strict_fp_requires_explicit_rounding = false -}; - #ifdef __SOFTFP__ #define SOFT(n) n #define VFP(n) diff --git a/src/hotspot/cpu/arm/c1_FpuStackSim_arm.cpp b/src/hotspot/cpu/arm/c1_FpuStackSim_arm.cpp deleted file mode 100644 index 287f4e412d74b..0000000000000 --- a/src/hotspot/cpu/arm/c1_FpuStackSim_arm.cpp +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Copyright (c) 2008, 2017, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -// Nothing needed here diff --git a/src/hotspot/cpu/arm/c1_FpuStackSim_arm.hpp b/src/hotspot/cpu/arm/c1_FpuStackSim_arm.hpp deleted file mode 100644 index 74e5ebc81b70b..0000000000000 --- a/src/hotspot/cpu/arm/c1_FpuStackSim_arm.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_ARM_C1_FPUSTACKSIM_ARM_HPP -#define CPU_ARM_C1_FPUSTACKSIM_ARM_HPP - -// Nothing needed here - -#endif // CPU_ARM_C1_FPUSTACKSIM_ARM_HPP diff --git a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp index a70bf2cbda953..4b47a589746f7 100644 --- a/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRGenerator_arm.cpp @@ -925,7 +925,7 @@ void LIRGenerator::do_Convert(Convert* x) { LIRItem value(x->value(), this); value.load_item(); LIR_Opr reg = rlock_result(x); - __ convert(x->op(), value.result(), reg, nullptr); + __ convert(x->op(), value.result(), reg); return; } } diff --git a/src/hotspot/cpu/arm/c1_LinearScan_arm.cpp b/src/hotspot/cpu/arm/c1_LinearScan_arm.cpp deleted file mode 100644 index 21030b9a23f95..0000000000000 --- a/src/hotspot/cpu/arm/c1_LinearScan_arm.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2008, 2011, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_Instruction.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/bitMap.inline.hpp" - -void LinearScan::allocate_fpu_stack() { - // No FPU stack on ARM -} diff --git a/src/hotspot/cpu/arm/interp_masm_arm.hpp b/src/hotspot/cpu/arm/interp_masm_arm.hpp index 58eeda6fbbbf2..578e191719e18 100644 --- a/src/hotspot/cpu/arm/interp_masm_arm.hpp +++ b/src/hotspot/cpu/arm/interp_masm_arm.hpp @@ -198,10 +198,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // Debugging void interp_verify_oop(Register reg, TosState state, const char* file, int line); // only if +VerifyOops && state == atos - void verify_FPU(int stack_depth, TosState state = ftos) { - // No VFP state verification is required for ARM - } - // Object locking void lock_object (Register lock_reg); void unlock_object(Register lock_reg); diff --git a/src/hotspot/cpu/arm/matcher_arm.hpp b/src/hotspot/cpu/arm/matcher_arm.hpp index a4436b7eab410..66fe8ac330eb5 100644 --- a/src/hotspot/cpu/arm/matcher_arm.hpp +++ b/src/hotspot/cpu/arm/matcher_arm.hpp @@ -101,9 +101,6 @@ // Java calling convention forces doubles to be aligned. static const bool misaligned_doubles_ok = false; - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. - static const bool strict_fp_requires_explicit_rounding = false; - // Are floats converted to double when stored to stack during deoptimization? // ARM does not handle callee-save floats. static constexpr bool float_in_double() { diff --git a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp index 9df7a455eeb84..c547b7c4975aa 100644 --- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp +++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp @@ -795,10 +795,6 @@ address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; address TemplateInterpreterGenerator::generate_CRC32_update_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; } address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } diff --git a/src/hotspot/cpu/ppc/c1_Defs_ppc.hpp b/src/hotspot/cpu/ppc/c1_Defs_ppc.hpp index 9044b9edd2653..f322e548e2161 100644 --- a/src/hotspot/cpu/ppc/c1_Defs_ppc.hpp +++ b/src/hotspot/cpu/ppc/c1_Defs_ppc.hpp @@ -38,12 +38,6 @@ enum { }; -// Explicit rounding operations are not required to implement the strictFP mode. -enum { - pd_strict_fp_requires_explicit_rounding = false -}; - - // registers enum { pd_nof_cpu_regs_frame_map = 32, // Number of registers used during code emission. diff --git a/src/hotspot/cpu/ppc/c1_FpuStackSim_ppc.hpp b/src/hotspot/cpu/ppc/c1_FpuStackSim_ppc.hpp deleted file mode 100644 index 84d180a9b0380..0000000000000 --- a/src/hotspot/cpu/ppc/c1_FpuStackSim_ppc.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_PPC_C1_FPUSTACKSIM_PPC_HPP -#define CPU_PPC_C1_FPUSTACKSIM_PPC_HPP - -// No FPU stack on PPC. -class FpuStackSim; - -#endif // CPU_PPC_C1_FPUSTACKSIM_PPC_HPP diff --git a/src/hotspot/cpu/ppc/c1_LinearScan_ppc.cpp b/src/hotspot/cpu/ppc/c1_LinearScan_ppc.cpp deleted file mode 100644 index 026540f25b213..0000000000000 --- a/src/hotspot/cpu/ppc/c1_LinearScan_ppc.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2012, 2015 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_Instruction.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/bitMap.inline.hpp" - -void LinearScan::allocate_fpu_stack() { - Unimplemented(); - // No FPU stack on PPC -} diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp index 7277ac3bc1a0f..99ac037e4b7cb 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp @@ -265,7 +265,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // Debugging void verify_oop(Register reg, TosState state = atos); // only if +VerifyOops && state == atos void verify_oop_or_return_address(Register reg, Register rtmp); // for astore - void verify_FPU(int stack_depth, TosState state = ftos); typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 67b9bdc04142f..8b9f9828c9987 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -2382,12 +2382,6 @@ static bool verify_return_address(Method* m, int bci) { return false; } -void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { - if (VerifyFPU) { - unimplemented("verfiyFPU"); - } -} - void InterpreterMacroAssembler::verify_oop_or_return_address(Register reg, Register Rtmp) { if (!VerifyOops) return; diff --git a/src/hotspot/cpu/ppc/matcher_ppc.hpp b/src/hotspot/cpu/ppc/matcher_ppc.hpp index aaac79325c421..c5b22be86404d 100644 --- a/src/hotspot/cpu/ppc/matcher_ppc.hpp +++ b/src/hotspot/cpu/ppc/matcher_ppc.hpp @@ -113,9 +113,6 @@ // Java calling convention forces doubles to be aligned. static const bool misaligned_doubles_ok = true; - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. - static const bool strict_fp_requires_explicit_rounding = false; - // Do floats take an entire double register or just half? // // A float occupies a ppc64 double register. For the allocator, a diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 808dd02273895..80b424e099bf7 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -9532,28 +9532,6 @@ instruct sqrtF_reg(regF dst, regF src) %{ ins_pipe(pipe_class_default); %} -instruct roundDouble_nop(regD dst) %{ - match(Set dst (RoundDouble dst)); - ins_cost(0); - - format %{ " -- \t// RoundDouble not needed - empty" %} - size(0); - // PPC results are already "rounded" (i.e., normal-format IEEE). - ins_encode( /*empty*/ ); - ins_pipe(pipe_class_default); -%} - -instruct roundFloat_nop(regF dst) %{ - match(Set dst (RoundFloat dst)); - ins_cost(0); - - format %{ " -- \t// RoundFloat not needed - empty" %} - size(0); - // PPC results are already "rounded" (i.e., normal-format IEEE). - ins_encode( /*empty*/ ); - ins_pipe(pipe_class_default); -%} - // Multiply-Accumulate // src1 * src2 + src3 diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index 9147dfc1677ab..d4a02333bffd3 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -1999,10 +1999,6 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract // Not supported address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } // ============================================================================= // Exceptions diff --git a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp index bce243802980b..651136566013a 100644 --- a/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp +++ b/src/hotspot/cpu/riscv/c1_Defs_riscv.hpp @@ -32,11 +32,6 @@ enum { pd_hi_word_offset_in_bytes = BytesPerWord }; -// explicit rounding operations are required to implement the strictFP mode -enum { - pd_strict_fp_requires_explicit_rounding = false -}; - // registers enum { pd_nof_cpu_regs_frame_map = Register::number_of_registers, // number of registers used during code emission diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp deleted file mode 100644 index e3a2606c5323b..0000000000000 --- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.cpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -//-------------------------------------------------------- -// FpuStackSim -//-------------------------------------------------------- - -// No FPU stack on RISCV diff --git a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp b/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp deleted file mode 100644 index 7bc3d3115018e..0000000000000 --- a/src/hotspot/cpu/riscv/c1_FpuStackSim_riscv.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP -#define CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP - -// No FPU stack on RISCV -class FpuStackSim; - -#endif // CPU_RISCV_C1_FPUSTACKSIM_RISCV_HPP diff --git a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp index b328d457192ba..8259d056ac3b6 100644 --- a/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_LIRGenerator_riscv.cpp @@ -356,7 +356,7 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { LIR_Opr reg = rlock(x); arithmetic_op_fpu(x->op(), reg, left.result(), right.result()); - set_result(x, round_item(reg)); + set_result(x, reg); } // for _ladd, _lmul, _lsub, _ldiv, _lrem diff --git a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp b/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp deleted file mode 100644 index 78a61128bdd5d..0000000000000 --- a/src/hotspot/cpu/riscv/c1_LinearScan_riscv.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_Instruction.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/bitMap.inline.hpp" - -void LinearScan::allocate_fpu_stack() { - // No FPU stack on RISCV -} diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index e17a3765b50ec..a27dc75ca3402 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -1432,8 +1432,6 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, } } -void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; } - void InterpreterMacroAssembler::notify_method_entry() { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp index f52f6ebc11e33..b94140ea9904f 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.hpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.hpp @@ -284,10 +284,6 @@ class InterpreterMacroAssembler: public MacroAssembler { void profile_return_type(Register mdp, Register ret, Register tmp); void profile_parameters_type(Register mdp, Register tmp1, Register tmp2, Register tmp3); - // Debugging - // only if +VerifyFPU && (state == ftos || state == dtos) - void verify_FPU(int stack_depth, TosState state = ftos); - typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; // support for jvmti/dtrace diff --git a/src/hotspot/cpu/riscv/matcher_riscv.hpp b/src/hotspot/cpu/riscv/matcher_riscv.hpp index ed1519ec1503a..1b490a07f92a6 100644 --- a/src/hotspot/cpu/riscv/matcher_riscv.hpp +++ b/src/hotspot/cpu/riscv/matcher_riscv.hpp @@ -114,9 +114,6 @@ // C code as the Java calling convention forces doubles to be aligned. static const bool misaligned_doubles_ok = true; - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. - static const bool strict_fp_requires_explicit_rounding = false; - // Are floats converted to double when stored to stack during // deoptimization? static constexpr bool float_in_double() { return false; } diff --git a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp index bc67de54c4bc6..69dd46b4703be 100644 --- a/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp +++ b/src/hotspot/cpu/riscv/templateInterpreterGenerator_riscv.cpp @@ -939,10 +939,6 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract } // Not supported -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } diff --git a/src/hotspot/cpu/s390/c1_Defs_s390.hpp b/src/hotspot/cpu/s390/c1_Defs_s390.hpp index 6343a40bb06c7..31e0f85efcc2a 100644 --- a/src/hotspot/cpu/s390/c1_Defs_s390.hpp +++ b/src/hotspot/cpu/s390/c1_Defs_s390.hpp @@ -32,11 +32,6 @@ enum { pd_hi_word_offset_in_bytes = 0 }; -// Explicit rounding operations are not required to implement the strictFP mode. -enum { - pd_strict_fp_requires_explicit_rounding = false -}; - // registers enum { pd_nof_cpu_regs_frame_map = 16, // Number of registers used during code emission. diff --git a/src/hotspot/cpu/s390/c1_FpuStackSim_s390.hpp b/src/hotspot/cpu/s390/c1_FpuStackSim_s390.hpp deleted file mode 100644 index 9a13f6ec70c50..0000000000000 --- a/src/hotspot/cpu/s390/c1_FpuStackSim_s390.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_S390_C1_FPUSTACKSIM_S390_HPP -#define CPU_S390_C1_FPUSTACKSIM_S390_HPP - -// No FPU stack on ZARCH_64 -class FpuStackSim; - -#endif // CPU_S390_C1_FPUSTACKSIM_S390_HPP diff --git a/src/hotspot/cpu/s390/c1_LinearScan_s390.cpp b/src/hotspot/cpu/s390/c1_LinearScan_s390.cpp deleted file mode 100644 index f48496f34d3ee..0000000000000 --- a/src/hotspot/cpu/s390/c1_LinearScan_s390.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2016 SAP SE. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/debug.hpp" - -void LinearScan::allocate_fpu_stack() { - // No FPU stack on ZARCH_64. - ShouldNotCallThis(); -} diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index 5e80817aaba7b..696ee230d5c49 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -94,8 +94,6 @@ void InterpreterMacroAssembler::dispatch_next(TosState state, int bcp_incr, bool // Dispatch value in Lbyte_code and increment Lbcp. void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bool generate_poll) { - verify_FPU(1, state); - #ifdef ASSERT address reentry = nullptr; { Label OK; @@ -2190,9 +2188,3 @@ void InterpreterMacroAssembler::pop_interpreter_frame(Register return_pc, Regist z_stg(Z_ARG3, _z_parent_ijava_frame_abi(return_pc), Z_SP); #endif } - -void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { - if (VerifyFPU) { - unimplemented("verifyFPU"); - } -} diff --git a/src/hotspot/cpu/s390/interp_masm_s390.hpp b/src/hotspot/cpu/s390/interp_masm_s390.hpp index f94473b1700b7..2473463219ca5 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.hpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.hpp @@ -313,7 +313,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // Debugging void verify_oop(Register reg, TosState state = atos); // Only if +VerifyOops && state == atos. void verify_oop_or_return_address(Register reg, Register rtmp); // for astore - void verify_FPU(int stack_depth, TosState state = ftos); // JVMTI helpers void skip_if_jvmti_mode(Label &Lskip, Register Rscratch = Z_R0); diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index d6ab89a61d2c9..fc7de7e70e909 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -5317,23 +5317,6 @@ instruct membar_storestore() %{ //----------Register Move Instructions----------------------------------------- -instruct roundDouble_nop(regD dst) %{ - match(Set dst (RoundDouble dst)); - ins_cost(0); - // TODO: s390 port size(FIXED_SIZE); - // z/Architecture results are already "rounded" (i.e., normal-format IEEE). - ins_encode(); - ins_pipe(pipe_class_dummy); -%} - -instruct roundFloat_nop(regF dst) %{ - match(Set dst (RoundFloat dst)); - ins_cost(0); - // TODO: s390 port size(FIXED_SIZE); - // z/Architecture results are already "rounded" (i.e., normal-format IEEE). - ins_encode(); - ins_pipe(pipe_class_dummy); -%} // Cast Long to Pointer for unsafe natives. instruct castX2P(iRegP dst, iRegL src) %{ diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index 1c4089d5beb07..42be79b9f7a68 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -2007,10 +2007,6 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract // Not supported address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { return nullptr; } address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { return nullptr; } diff --git a/src/hotspot/cpu/x86/abstractInterpreter_x86.cpp b/src/hotspot/cpu/x86/abstractInterpreter_x86.cpp index fef137257b020..9a996deb184dc 100644 --- a/src/hotspot/cpu/x86/abstractInterpreter_x86.cpp +++ b/src/hotspot/cpu/x86/abstractInterpreter_x86.cpp @@ -118,27 +118,6 @@ void AbstractInterpreter::layout_activation(Method* method, method->method_holder()->java_mirror(); } -#ifndef _LP64 -int AbstractInterpreter::BasicType_as_index(BasicType type) { - int i = 0; - switch (type) { - case T_BOOLEAN: i = 0; break; - case T_CHAR : i = 1; break; - case T_BYTE : i = 2; break; - case T_SHORT : i = 3; break; - case T_INT : // fall through - case T_LONG : // fall through - case T_VOID : i = 4; break; - case T_FLOAT : i = 5; break; // have to treat float and double separately for SSE - case T_DOUBLE : i = 6; break; - case T_OBJECT : // fall through - case T_ARRAY : i = 7; break; - default : ShouldNotReachHere(); - } - assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds"); - return i; -} -#else int AbstractInterpreter::BasicType_as_index(BasicType type) { int i = 0; switch (type) { @@ -159,7 +138,6 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) { "index out of bounds"); return i; } -#endif // _LP64 // How much stack a method activation needs in words. int AbstractInterpreter::size_top_interpreter_activation(Method* method) { @@ -171,11 +149,7 @@ int AbstractInterpreter::size_top_interpreter_activation(Method* method) { const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; -#ifndef _LP64 - const int stub_code = 4; // see generate_call_stub -#else const int stub_code = frame::entry_frame_after_call_words; -#endif const int method_stack = (method->max_locals() + method->max_stack()) * Interpreter::stackElementWords; diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index c2fcbcea71e24..f72099982609f 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -25,6 +25,8 @@ #include "precompiled.hpp" #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" #include "gc/shared/cardTableBarrierSet.hpp" #include "interpreter/interpreter.hpp" #include "memory/resourceArea.hpp" @@ -120,8 +122,6 @@ AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { // Implementation of Address -#ifdef _LP64 - Address Address::make_array(ArrayAddress adr) { // Not implementable on 64bit machines // Should have been handled higher up the call chain. @@ -158,30 +158,6 @@ Address::Address(int disp, address loc, relocInfo::relocType rtype) { ShouldNotReachHere(); } } -#else // LP64 - -Address Address::make_array(ArrayAddress adr) { - AddressLiteral base = adr.base(); - Address index = adr.index(); - assert(index._disp == 0, "must not have disp"); // maybe it can? - Address array(index._base, index._index, index._scale, (intptr_t) base.target()); - array._rspec = base._rspec; - return array; -} - -// exceedingly dangerous constructor -Address::Address(address loc, RelocationHolder spec) { - _base = noreg; - _index = noreg; - _scale = no_scale; - _disp = (intptr_t) loc; - _rspec = spec; - _xmmindex = xnoreg; - _isxmmindex = false; -} - -#endif // _LP64 - // Convert the raw encoding form into the form expected by the constructor for @@ -215,7 +191,6 @@ void Assembler::init_attributes(void) { _legacy_mode_dq = (VM_Version::supports_avx512dq() == false); _legacy_mode_vl = (VM_Version::supports_avx512vl() == false); _legacy_mode_vlbw = (VM_Version::supports_avx512vlbw() == false); - NOT_LP64(_is_managed = false;) _attributes = nullptr; } @@ -731,7 +706,7 @@ void Assembler::emit_operand_helper(int reg_enc, int base_enc, int index_enc, address next_ip = pc() + sizeof(int32_t) + post_addr_length; int64_t adjusted = disp; // Do rip-rel adjustment for 64bit - LP64_ONLY(adjusted -= (next_ip - inst_mark())); + adjusted -= (next_ip - inst_mark()); assert(is_simm32(adjusted), "must be 32bit offset (RIP relative address)"); emit_data((int32_t) adjusted, rspec, disp32_operand); @@ -832,7 +807,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case FS_segment: case GS_segment: // Seems dubious - LP64_ONLY(assert(false, "shouldn't have that prefix")); + assert(false, "shouldn't have that prefix"); assert(ip == inst+1, "only one prefix allowed"); goto again_after_prefix; @@ -845,11 +820,9 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case REX_RB: case REX_RX: case REX_RXB: - NOT_LP64(assert(false, "64bit prefixes")); goto again_after_prefix; case REX2: - NOT_LP64(assert(false, "64bit prefixes")); if ((0xFF & *ip++) & REX2BIT_W) { is_64bit = true; } @@ -863,7 +836,6 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case REX_WRB: case REX_WRX: case REX_WRXB: - NOT_LP64(assert(false, "64bit prefixes")); is_64bit = true; goto again_after_prefix; @@ -902,11 +874,9 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case REX_WRB: case REX_WRX: case REX_WRXB: - NOT_LP64(assert(false, "64bit prefix found")); goto again_after_size_prefix2; case REX2: - NOT_LP64(assert(false, "64bit prefix found")); if ((0xFF & *ip++) & REX2BIT_W) { is_64bit = true; } @@ -931,14 +901,9 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case REP8(0xB8): // movl/q r, #32/#64(oop?) if (which == end_pc_operand) return ip + (is_64bit ? 8 : 4); // these asserts are somewhat nonsensical -#ifndef _LP64 - assert(which == imm_operand || which == disp32_operand, - "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)); -#else assert(((which == call32_operand || which == imm_operand) && is_64bit) || (which == narrow_oop_operand && !is_64bit), "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)); -#endif // _LP64 return ip; case 0x69: // imul r, a, #32 @@ -1098,9 +1063,6 @@ address Assembler::locate_operand(address inst, WhichOperand which) { // Fortunately C2 doesn't generate these instructions so we don't need // to check for them in product version. - // Check second byte - NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions")); - int vex_opcode; // First byte if ((0xFF & *inst) == VEX_3bytes) { @@ -1202,7 +1164,6 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case REX_WRX: case REX_WRXB: case REX2: - NOT_LP64(assert(false, "found 64bit prefix")); ip++; default: ip++; @@ -1218,12 +1179,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { } assert(which != call32_operand, "instruction is not a call, jmp, or jcc"); -#ifdef _LP64 assert(which != imm_operand, "instruction is not a movq reg, imm64"); -#else - // assert(which != imm_operand || has_imm32, "instruction has no imm32 field"); - assert(which != imm_operand || has_disp32, "instruction has no imm32 field"); -#endif // LP64 assert(which != disp32_operand || has_disp32, "instruction has no disp32 field"); // parse the output of emit_operand @@ -1278,11 +1234,7 @@ address Assembler::locate_operand(address inst, WhichOperand which) { return ip + tail_size; } -#ifdef _LP64 assert(which == narrow_oop_operand && !is_64bit, "instruction is not a movl adr, imm32"); -#else - assert(which == imm_operand, "instruction has only an imm field"); -#endif // LP64 return ip; } @@ -1305,8 +1257,7 @@ void Assembler::check_relocation(RelocationHolder const& rspec, int format) { // assert(format == imm32_operand, "cannot specify a nonzero format"); opnd = locate_operand(inst, call32_operand); } else if (r->is_data()) { - assert(format == imm_operand || format == disp32_operand - LP64_ONLY(|| format == narrow_oop_operand), "format ok"); + assert(format == imm_operand || format == disp32_operand || format == narrow_oop_operand, "format ok"); opnd = locate_operand(inst, (WhichOperand)format); } else { assert(format == imm_operand, "cannot specify a format"); @@ -1535,7 +1486,6 @@ void Assembler::addr_nop_8() { } void Assembler::addsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -1543,7 +1493,6 @@ void Assembler::addsd(XMMRegister dst, XMMRegister src) { } void Assembler::addsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -1554,14 +1503,12 @@ void Assembler::addsd(XMMRegister dst, Address src) { } void Assembler::addss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x58, (0xC0 | encode)); } void Assembler::addss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -1820,9 +1767,6 @@ void Assembler::blsrl(Register dst, Address src) { } void Assembler::call(Label& L, relocInfo::relocType rtype) { - // suspect disp32 is always good - int operand = LP64_ONLY(disp32_operand) NOT_LP64(imm_operand); - if (L.is_bound()) { const int long_size = 5; int offs = (int)( target(L) - pc() ); @@ -1830,14 +1774,14 @@ void Assembler::call(Label& L, relocInfo::relocType rtype) { InstructionMark im(this); // 1110 1000 #32-bit disp emit_int8((unsigned char)0xE8); - emit_data(offs - long_size, rtype, operand); + emit_data(offs - long_size, rtype, disp32_operand); } else { InstructionMark im(this); // 1110 1000 #32-bit disp L.add_patch_at(code(), locator()); emit_int8((unsigned char)0xE8); - emit_data(int(0), rtype, operand); + emit_data(int(0), rtype, disp32_operand); } } @@ -1864,8 +1808,7 @@ void Assembler::call_literal(address entry, RelocationHolder const& rspec) { // Technically, should use call32_operand, but this format is // implied by the fact that we're emitting a call instruction. - int operand = LP64_ONLY(disp32_operand) NOT_LP64(call32_operand); - emit_data((int) disp, rspec, operand); + emit_data((int) disp, rspec, disp32_operand); } void Assembler::cdql() { @@ -1877,7 +1820,6 @@ void Assembler::cld() { } void Assembler::cmovl(Condition cc, Register dst, Register src) { - NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); int encode = prefix_and_encode(dst->encoding(), src->encoding(), true /* is_map1 */); emit_opcode_prefix_and_encoding(0x40 | cc, 0xC0, encode); } @@ -1890,7 +1832,6 @@ void Assembler::ecmovl(Condition cc, Register dst, Register src1, Register src2) void Assembler::cmovl(Condition cc, Register dst, Address src) { InstructionMark im(this); - NOT_LP64(guarantee(VM_Version::supports_cmov(), "illegal instruction")); prefix(src, dst, false, true /* is_map1 */); emit_int8((0x40 | cc)); emit_operand(dst, src, 0); @@ -2018,7 +1959,6 @@ void Assembler::cmpxchgb(Register reg, Address adr) { // cmpxchg void Assembler::comisd(XMMRegister dst, Address src) { // NOTE: dbx seems to decode this as comiss even though the // 0x66 is there. Strangely ucomisd comes out correct - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);; attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -2029,7 +1969,6 @@ void Assembler::comisd(XMMRegister dst, Address src) { } void Assembler::comisd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -2037,7 +1976,6 @@ void Assembler::comisd(XMMRegister dst, XMMRegister src) { } void Assembler::comiss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -2047,7 +1985,6 @@ void Assembler::comiss(XMMRegister dst, Address src) { } void Assembler::comiss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x2F, (0xC0 | encode)); @@ -2085,7 +2022,7 @@ void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) { case 2: case 4: break; - LP64_ONLY(case 8:) + case 8: // This instruction is not valid in 32 bits // Note: // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf @@ -2105,7 +2042,7 @@ void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) { assert(0, "Unsupported value for a sizeInBytes argument"); break; } - LP64_ONLY(prefix(crc, v, p);) + prefix(crc, v, p); emit_int32(0x0F, 0x38, 0xF0 | w, @@ -2134,7 +2071,7 @@ void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) { case 2: case 4: break; - LP64_ONLY(case 8:) + case 8: // This instruction is not valid in 32 bits p = REX_W; break; @@ -2142,14 +2079,13 @@ void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) { assert(0, "Unsupported value for a sizeInBytes argument"); break; } - LP64_ONLY(prefix(crc, adr, p);) + prefix(crc, adr, p); emit_int24(0x0F, 0x38, (0xF0 | w)); emit_operand(crc, adr, 0); } } void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xE6, (0xC0 | encode)); @@ -2212,7 +2148,6 @@ void Assembler::vcvtph2ps(XMMRegister dst, Address src, int vector_len) { } void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x5B, (0xC0 | encode)); @@ -2226,7 +2161,6 @@ void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) { } void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -2234,7 +2168,6 @@ void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { } void Assembler::cvtsd2ss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -2245,14 +2178,12 @@ void Assembler::cvtsd2ss(XMMRegister dst, Address src) { } void Assembler::cvtsi2sdl(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true); emit_int16(0x2A, (0xC0 | encode)); } void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -2262,14 +2193,12 @@ void Assembler::cvtsi2sdl(XMMRegister dst, Address src) { } void Assembler::cvtsi2ssl(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes, true); emit_int16(0x2A, (0xC0 | encode)); } void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -2279,21 +2208,18 @@ void Assembler::cvtsi2ssl(XMMRegister dst, Address src) { } void Assembler::cvtsi2ssq(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes, true); emit_int16(0x2A, (0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x5A, (0xC0 | encode)); } void Assembler::cvtss2sd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -2304,28 +2230,24 @@ void Assembler::cvtss2sd(XMMRegister dst, Address src) { void Assembler::cvttsd2sil(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int16(0x2C, (0xC0 | encode)); } void Assembler::cvtss2sil(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x2D, (0xC0 | encode)); } void Assembler::cvttss2sil(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x2C, (0xC0 | encode)); } void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -2538,7 +2460,6 @@ void Assembler::edecl(Register dst, Address src, bool no_flags) { } void Assembler::divsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -2549,7 +2470,6 @@ void Assembler::divsd(XMMRegister dst, Address src) { } void Assembler::divsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -2557,7 +2477,6 @@ void Assembler::divsd(XMMRegister dst, XMMRegister src) { } void Assembler::divss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -2567,7 +2486,6 @@ void Assembler::divss(XMMRegister dst, Address src) { } void Assembler::divss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x5E, (0xC0 | encode)); @@ -2839,7 +2757,6 @@ void Assembler::ldmxcsr( Address src) { emit_int8((unsigned char)0xAE); emit_operand(as_Register(2), src, 0); } else { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); prefix(src, true /* is_map1 */); emit_int8((unsigned char)0xAE); @@ -2854,7 +2771,6 @@ void Assembler::leal(Register dst, Address src) { emit_operand(dst, src, 0); } -#ifdef _LP64 void Assembler::lea(Register dst, Label& L) { emit_prefix_and_int8(get_prefixq(Address(), dst), (unsigned char)0x8D); if (!L.is_bound()) { @@ -2872,7 +2788,6 @@ void Assembler::lea(Register dst, Label& L) { emit_int32(disp); } } -#endif void Assembler::lfence() { emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xE8); @@ -2921,22 +2836,19 @@ void Assembler::elzcntl(Register dst, Address src, bool no_flags) { // Emit mfence instruction void Assembler::mfence() { - NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF0); } // Emit sfence instruction void Assembler::sfence() { - NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) emit_int24(0x0F, (unsigned char)0xAE, (unsigned char)0xF8); } void Assembler::mov(Register dst, Register src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); + movq(dst, src); } void Assembler::movapd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); @@ -2945,7 +2857,6 @@ void Assembler::movapd(XMMRegister dst, XMMRegister src) { } void Assembler::movaps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); @@ -2953,14 +2864,12 @@ void Assembler::movaps(XMMRegister dst, XMMRegister src) { } void Assembler::movlhps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x16, (0xC0 | encode)); } void Assembler::movb(Register dst, Address src) { - NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); InstructionMark im(this); prefix(src, dst, true); emit_int8((unsigned char)0x8A); @@ -3389,14 +3298,12 @@ void Assembler::movb(Address dst, Register src) { } void Assembler::movdl(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); emit_int16(0x6E, (0xC0 | encode)); } void Assembler::movdl(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); @@ -3404,7 +3311,6 @@ void Assembler::movdl(Register dst, XMMRegister src) { } void Assembler::movdl(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -3414,7 +3320,6 @@ void Assembler::movdl(XMMRegister dst, Address src) { } void Assembler::movdl(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -3424,14 +3329,12 @@ void Assembler::movdl(Address dst, XMMRegister src) { } void Assembler::movdqa(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x6F, (0xC0 | encode)); } void Assembler::movdqa(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); @@ -3441,7 +3344,6 @@ void Assembler::movdqa(XMMRegister dst, Address src) { } void Assembler::movdqu(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); @@ -3451,14 +3353,12 @@ void Assembler::movdqu(XMMRegister dst, Address src) { } void Assembler::movdqu(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x6F, (0xC0 | encode)); } void Assembler::movdqu(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); @@ -3842,7 +3742,6 @@ void Assembler::movl(Address dst, Register src) { // when loading from memory. But for old Opteron use movlpd instead of movsd. // The selection is done in MacroAssembler::movdbl() and movflt(). void Assembler::movlpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -3853,7 +3752,6 @@ void Assembler::movlpd(XMMRegister dst, Address src) { } void Assembler::movq(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -3864,7 +3762,6 @@ void Assembler::movq(XMMRegister dst, Address src) { } void Assembler::movq(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -3875,7 +3772,6 @@ void Assembler::movq(Address dst, XMMRegister src) { } void Assembler::movq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -3883,7 +3779,6 @@ void Assembler::movq(XMMRegister dst, XMMRegister src) { } void Assembler::movq(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); @@ -3891,7 +3786,6 @@ void Assembler::movq(Register dst, XMMRegister src) { } void Assembler::movq(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); emit_int16(0x6E, (0xC0 | encode)); @@ -3905,13 +3799,11 @@ void Assembler::movsbl(Register dst, Address src) { // movsxb } void Assembler::movsbl(Register dst, Register src) { // movsxb - NOT_LP64(assert(src->has_byte_register(), "must have byte register")); int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true, true /* is_map1 */); emit_opcode_prefix_and_encoding((unsigned char)0xBE, 0xC0, encode); } void Assembler::movsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -3919,7 +3811,6 @@ void Assembler::movsd(XMMRegister dst, XMMRegister src) { } void Assembler::movsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -3930,7 +3821,6 @@ void Assembler::movsd(XMMRegister dst, Address src) { } void Assembler::movsd(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -3950,14 +3840,12 @@ void Assembler::vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2) { } void Assembler::movss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x10, (0xC0 | encode)); } void Assembler::movss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -3967,7 +3855,6 @@ void Assembler::movss(XMMRegister dst, Address src) { } void Assembler::movss(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -3990,7 +3877,6 @@ void Assembler::movswl(Register dst, Register src) { // movsxw } void Assembler::movups(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); @@ -4010,7 +3896,6 @@ void Assembler::vmovups(XMMRegister dst, Address src, int vector_len) { } void Assembler::movups(Address dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); @@ -4063,7 +3948,6 @@ void Assembler::movzbl(Register dst, Address src) { // movzxb } void Assembler::movzbl(Register dst, Register src) { // movzxb - NOT_LP64(assert(src->has_byte_register(), "must have byte register")); int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true, true /* is_map1 */); emit_opcode_prefix_and_encoding((unsigned char)0xB6, 0xC0, encode); } @@ -4108,7 +3992,6 @@ void Assembler::emull(Register src, bool no_flags) { } void Assembler::mulsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -4119,7 +4002,6 @@ void Assembler::mulsd(XMMRegister dst, Address src) { } void Assembler::mulsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -4127,7 +4009,6 @@ void Assembler::mulsd(XMMRegister dst, XMMRegister src) { } void Assembler::mulss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -4137,7 +4018,6 @@ void Assembler::mulss(XMMRegister dst, Address src) { } void Assembler::mulss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x59, (0xC0 | encode)); @@ -4600,7 +4480,6 @@ void Assembler::eorb(Register dst, Address src1, Register src2, bool no_flags) { } void Assembler::packsswb(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x63, (0xC0 | encode)); @@ -4614,7 +4493,6 @@ void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int } void Assembler::packssdw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x6B, (0xC0 | encode)); @@ -4628,7 +4506,6 @@ void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int } void Assembler::packuswb(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -4639,7 +4516,6 @@ void Assembler::packuswb(XMMRegister dst, Address src) { } void Assembler::packuswb(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x67, (0xC0 | encode)); @@ -4820,7 +4696,6 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x74, (0xC0 | encode)); @@ -4968,7 +4843,6 @@ void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Addre // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x75, (0xC0 | encode)); @@ -5017,7 +4891,6 @@ void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vect // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x76, (0xC0 | encode)); @@ -5122,7 +4995,6 @@ void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) { } void Assembler::pmovmskb(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xD7, (0xC0 | encode)); @@ -5188,7 +5060,6 @@ void Assembler::pextrq(Address dst, XMMRegister src, int imm8) { } void Assembler::pextrw(Register dst, XMMRegister src, int imm8) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8); @@ -5274,14 +5145,12 @@ void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8 } void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8); } void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); @@ -5591,7 +5460,6 @@ void Assembler::vpmovzxwq(XMMRegister dst, XMMRegister src, int vector_len) { } void Assembler::pmaddwd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF5, (0xC0 | encode)); @@ -5807,18 +5675,7 @@ void Assembler::popf() { emit_int8((unsigned char)0x9D); } -#ifndef _LP64 // no 32bit push/pop on amd64 -void Assembler::popl(Address dst) { - // NOTE: this will adjust stack by 8byte on 64bits - InstructionMark im(this); - prefix(dst); - emit_int8((unsigned char)0x8F); - emit_operand(rax, dst, 0); -} -#endif - void Assembler::prefetchnta(Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefix(src, true /* is_map1 */); emit_int8(0x18); @@ -5834,7 +5691,6 @@ void Assembler::prefetchr(Address src) { } void Assembler::prefetcht0(Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefix(src, true /* is_map1 */); emit_int8(0x18); @@ -5842,7 +5698,6 @@ void Assembler::prefetcht0(Address src) { } void Assembler::prefetcht1(Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefix(src, true /* is_map1 */); emit_int8(0x18); @@ -5850,7 +5705,6 @@ void Assembler::prefetcht1(Address src) { } void Assembler::prefetcht2(Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "must support")); InstructionMark im(this); prefix(src, true /* is_map1 */); emit_int8(0x18); @@ -5927,7 +5781,6 @@ void Assembler::pshufb(XMMRegister dst, Address src) { void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit; InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -5938,7 +5791,6 @@ void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_l assert(vector_len == AVX_128bit? VM_Version::supports_avx() : (vector_len == AVX_256bit? VM_Version::supports_avx2() : (vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), ""); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24(0x70, (0xC0 | encode), mode & 0xFF); @@ -5946,7 +5798,6 @@ void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_l void Assembler::pshufd(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -5959,7 +5810,6 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int24(0x70, (0xC0 | encode), mode & 0xFF); @@ -5969,7 +5819,6 @@ void Assembler::vpshufhw(XMMRegister dst, XMMRegister src, int mode, int vector_ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : (vector_len == AVX_256bit ? VM_Version::supports_avx2() : (vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false)), ""); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int24(0x70, (0xC0 | encode), mode & 0xFF); @@ -5977,7 +5826,6 @@ void Assembler::vpshufhw(XMMRegister dst, XMMRegister src, int mode, int vector_ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int24(0x70, (0xC0 | encode), mode & 0xFF); @@ -5985,7 +5833,6 @@ void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { void Assembler::pshuflw(XMMRegister dst, Address src, int mode) { assert(isByte(mode), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6000,7 +5847,6 @@ void Assembler::vpshuflw(XMMRegister dst, XMMRegister src, int mode, int vector_ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : (vector_len == AVX_256bit ? VM_Version::supports_avx2() : (vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false)), ""); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int24(0x70, (0xC0 | encode), mode & 0xFF); @@ -6017,7 +5863,6 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i void Assembler::shufpd(XMMRegister dst, XMMRegister src, int imm8) { assert(isByte(imm8), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); @@ -6032,7 +5877,6 @@ void Assembler::vshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int i void Assembler::shufps(XMMRegister dst, XMMRegister src, int imm8) { assert(isByte(imm8), "invalid value"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); @@ -6046,7 +5890,6 @@ void Assembler::vshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int i void Assembler::psrldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24(0x73, (0xC0 | encode), shift); @@ -6063,7 +5906,6 @@ void Assembler::vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_ void Assembler::pslldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); // XMM7 is for /7 encoding: 66 0F 73 /7 ib int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -6159,7 +6001,6 @@ void Assembler::evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int } void Assembler::punpcklbw(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6170,14 +6011,12 @@ void Assembler::punpcklbw(XMMRegister dst, Address src) { } void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_vlbw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x60, (0xC0 | encode)); } void Assembler::punpckldq(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6188,14 +6027,12 @@ void Assembler::punpckldq(XMMRegister dst, Address src) { } void Assembler::punpckldq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x62, (0xC0 | encode)); } void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -6238,7 +6075,6 @@ void Assembler::evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, emit_int16(0x6D, (0xC0 | encode)); } -#ifdef _LP64 void Assembler::push2(Register src1, Register src2, bool with_ppx) { assert(VM_Version::supports_apx_f(), "requires APX"); InstructionAttr attributes(0, /* rex_w */ with_ppx, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -6300,8 +6136,6 @@ void Assembler::popp(Register dst) { int encode = prefixq_and_encode_rex2(dst->encoding()); emit_int8((unsigned char)0x58 | encode); } -#endif //_LP64 - void Assembler::push(int32_t imm32) { // in 64bits we push 64bits onto the stack but only @@ -6319,16 +6153,6 @@ void Assembler::pushf() { emit_int8((unsigned char)0x9C); } -#ifndef _LP64 // no 32bit push/pop on amd64 -void Assembler::pushl(Address src) { - // Note this will push 64bit on 64bit - InstructionMark im(this); - prefix(src); - emit_int8((unsigned char)0xFF); - emit_operand(rsi, src, 0); -} -#endif - void Assembler::rcll(Register dst, int imm8) { assert(isShiftCount(imm8), "illegal shift count"); int encode = prefix_and_encode(dst->encoding()); @@ -6351,14 +6175,12 @@ void Assembler::ercll(Register dst, Register src, int imm8) { } void Assembler::rcpps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x53, (0xC0 | encode)); } void Assembler::rcpss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x53, (0xC0 | encode)); @@ -6373,43 +6195,37 @@ void Assembler::rdtsc() { void Assembler::rep_mov() { // REP // MOVSQ - LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xA5);) - NOT_LP64( emit_int16((unsigned char)0xF3, (unsigned char)0xA5);) + emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xA5); } // sets rcx bytes with rax, value at [edi] void Assembler::rep_stosb() { // REP // STOSB - LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAA);) - NOT_LP64( emit_int16((unsigned char)0xF3, (unsigned char)0xAA);) + emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAA); } // sets rcx pointer sized words with rax, value at [edi] // generic void Assembler::rep_stos() { // REP - // LP64:STOSQ, LP32:STOSD - LP64_ONLY(emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAB);) - NOT_LP64( emit_int16((unsigned char)0xF3, (unsigned char)0xAB);) + // STOSQ + emit_int24((unsigned char)0xF3, REX_W, (unsigned char)0xAB); } // scans rcx pointer sized words at [edi] for occurrence of rax, // generic void Assembler::repne_scan() { // repne_scan // SCASQ - LP64_ONLY(emit_int24((unsigned char)0xF2, REX_W, (unsigned char)0xAF);) - NOT_LP64( emit_int16((unsigned char)0xF2, (unsigned char)0xAF);) + emit_int24((unsigned char)0xF2, REX_W, (unsigned char)0xAF); } -#ifdef _LP64 // scans rcx 4 byte words at [edi] for occurrence of rax, // generic void Assembler::repne_scanl() { // repne_scan // SCASL emit_int16((unsigned char)0xF2, (unsigned char)0xAF); } -#endif void Assembler::ret(int imm16) { if (imm16 == 0) { @@ -6484,7 +6300,6 @@ void Assembler::erorl(Register dst, Register src, bool no_flags) { emit_int16((unsigned char)0xD3, (0xC8 | encode)); } -#ifdef _LP64 void Assembler::rorq(Register dst) { int encode = prefixq_and_encode(dst->encoding()); emit_int16((unsigned char)0xD3, (0xC8 | encode)); @@ -6548,13 +6363,8 @@ void Assembler::erolq(Register dst, Register src, int imm8, bool no_flags) { emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8); } } -#endif void Assembler::sahf() { -#ifdef _LP64 - // Not supported in 64bit mode - ShouldNotReachHere(); -#endif emit_int8((unsigned char)0x9E); } @@ -7008,7 +6818,6 @@ void Assembler::eshrdl(Register dst, Register src1, Register src2, int8_t imm8, emit_int24(0x2C, (0xC0 | encode), imm8); } -#ifdef _LP64 void Assembler::shldq(Register dst, Register src, int8_t imm8) { int encode = prefixq_and_encode(src->encoding(), dst->encoding(), true /* is_map1 */); emit_opcode_prefix_and_encoding((unsigned char)0xA4, 0xC0, encode, imm8); @@ -7034,7 +6843,6 @@ void Assembler::eshrdq(Register dst, Register src1, Register src2, int8_t imm8, int encode = evex_prefix_and_encode_ndd(src2->encoding(), dst->encoding(), src1->encoding(), VEX_SIMD_NONE, /* MAP4 */VEX_OPCODE_0F_3C, &attributes, no_flags); emit_int24(0x2C, (0xC0 | encode), imm8); } -#endif // copies a single word from [esi] to [edi] void Assembler::smovl() { @@ -7059,7 +6867,6 @@ void Assembler::roundsd(XMMRegister dst, Address src, int32_t rmode) { } void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -7067,7 +6874,6 @@ void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) { } void Assembler::sqrtsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -7078,7 +6884,6 @@ void Assembler::sqrtsd(XMMRegister dst, Address src) { } void Assembler::sqrtss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x51, (0xC0 | encode)); @@ -7089,7 +6894,6 @@ void Assembler::std() { } void Assembler::sqrtss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -7110,7 +6914,6 @@ void Assembler::stmxcsr(Address dst) { emit_int8((unsigned char)0xAE); emit_operand(as_Register(3), dst, 0); } else { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); prefix(dst, true /* is_map1 */); emit_int8((unsigned char)0xAE); @@ -7201,7 +7004,6 @@ void Assembler::esubl(Register dst, Register src1, Register src2, bool no_flags) } void Assembler::subsd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); @@ -7209,7 +7011,6 @@ void Assembler::subsd(XMMRegister dst, XMMRegister src) { } void Assembler::subsd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -7220,14 +7021,12 @@ void Assembler::subsd(XMMRegister dst, Address src) { } void Assembler::subss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x5C, (0xC0 | encode)); } void Assembler::subss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -7237,7 +7036,6 @@ void Assembler::subss(XMMRegister dst, Address src) { } void Assembler::testb(Register dst, int imm8, bool use_ral) { - NOT_LP64(assert(dst->has_byte_register(), "must have byte register")); if (dst == rax) { if (use_ral) { emit_int8((unsigned char)0xA8); @@ -7363,7 +7161,6 @@ void Assembler::etzcntq(Register dst, Address src, bool no_flags) { } void Assembler::ucomisd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -7374,7 +7171,6 @@ void Assembler::ucomisd(XMMRegister dst, Address src) { } void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -7382,7 +7178,6 @@ void Assembler::ucomisd(XMMRegister dst, XMMRegister src) { } void Assembler::ucomiss(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); @@ -7392,7 +7187,6 @@ void Assembler::ucomiss(XMMRegister dst, Address src) { } void Assembler::ucomiss(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x2E, (0xC0 | encode)); @@ -7784,7 +7578,6 @@ void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) { // Float-point vector arithmetic void Assembler::addpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -7792,7 +7585,6 @@ void Assembler::addpd(XMMRegister dst, XMMRegister src) { } void Assembler::addpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); @@ -7804,7 +7596,6 @@ void Assembler::addpd(XMMRegister dst, Address src) { void Assembler::addps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x58, (0xC0 | encode)); @@ -7847,7 +7638,6 @@ void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector } void Assembler::subpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -7855,7 +7645,6 @@ void Assembler::subpd(XMMRegister dst, XMMRegister src) { } void Assembler::subps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x5C, (0xC0 | encode)); @@ -7898,7 +7687,6 @@ void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector } void Assembler::mulpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -7906,7 +7694,6 @@ void Assembler::mulpd(XMMRegister dst, XMMRegister src) { } void Assembler::mulpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); @@ -7917,7 +7704,6 @@ void Assembler::mulpd(XMMRegister dst, Address src) { } void Assembler::mulps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x59, (0xC0 | encode)); @@ -7994,7 +7780,6 @@ void Assembler::vfmadd231ps(XMMRegister dst, XMMRegister src1, Address src2, int } void Assembler::divpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8002,7 +7787,6 @@ void Assembler::divpd(XMMRegister dst, XMMRegister src) { } void Assembler::divps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x5E, (0xC0 | encode)); @@ -8136,7 +7920,6 @@ void Assembler::vsqrtps(XMMRegister dst, Address src, int vector_len) { } void Assembler::andpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8144,7 +7927,6 @@ void Assembler::andpd(XMMRegister dst, XMMRegister src) { } void Assembler::andnpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8152,14 +7934,12 @@ void Assembler::andnpd(XMMRegister dst, XMMRegister src) { } void Assembler::andps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x54, (0xC0 | encode)); } void Assembler::andps(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); @@ -8169,7 +7949,6 @@ void Assembler::andps(XMMRegister dst, Address src) { } void Assembler::andpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); @@ -8216,7 +7995,6 @@ void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector } void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8225,7 +8003,6 @@ void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) { } void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8233,7 +8010,6 @@ void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) { } void Assembler::xorpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8241,14 +8017,12 @@ void Assembler::xorpd(XMMRegister dst, XMMRegister src) { } void Assembler::xorps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x57, (0xC0 | encode)); } void Assembler::xorpd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_dq, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); @@ -8259,7 +8033,6 @@ void Assembler::xorpd(XMMRegister dst, Address src) { } void Assembler::xorps(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); @@ -8322,28 +8095,24 @@ void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int v } void Assembler::paddb(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xFC, (0xC0 | encode)); } void Assembler::paddw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xFD, (0xC0 | encode)); } void Assembler::paddd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xFE, (0xC0 | encode)); } void Assembler::paddd(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8352,7 +8121,6 @@ void Assembler::paddd(XMMRegister dst, Address src) { } void Assembler::paddq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8599,14 +8367,12 @@ void Assembler::vpsubusw(XMMRegister dst, XMMRegister nds, Address src, int vect void Assembler::psubb(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF8, (0xC0 | encode)); } void Assembler::psubw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF9, (0xC0 | encode)); @@ -8619,7 +8385,6 @@ void Assembler::psubd(XMMRegister dst, XMMRegister src) { } void Assembler::psubq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -8698,7 +8463,6 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector } void Assembler::pmullw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xD5, (0xC0 | encode)); @@ -8712,7 +8476,6 @@ void Assembler::pmulld(XMMRegister dst, XMMRegister src) { } void Assembler::pmuludq(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF4, (0xC0 | encode)); @@ -8813,7 +8576,6 @@ void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int v } void Assembler::pminsw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xEA, (0xC0 | encode)); @@ -8851,7 +8613,6 @@ void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v } void Assembler::minps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x5D, (0xC0 | encode)); @@ -8864,7 +8625,6 @@ void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve } void Assembler::minpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x5D, (0xC0 | encode)); @@ -8892,7 +8652,6 @@ void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int v } void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xEE, (0xC0 | encode)); @@ -8930,7 +8689,6 @@ void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v } void Assembler::maxps(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); emit_int16(0x5F, (0xC0 | encode)); @@ -8944,7 +8702,6 @@ void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve } void Assembler::maxpd(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x5F, (0xC0 | encode)); @@ -9299,7 +9056,6 @@ void Assembler::evpmaxuq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre // Shift packed integers left by specified number of bits. void Assembler::psllw(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 71 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9307,7 +9063,6 @@ void Assembler::psllw(XMMRegister dst, int shift) { } void Assembler::pslld(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9315,7 +9070,6 @@ void Assembler::pslld(XMMRegister dst, int shift) { } void Assembler::psllq(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 73 /6 ib int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9323,21 +9077,18 @@ void Assembler::psllq(XMMRegister dst, int shift) { } void Assembler::psllw(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF1, (0xC0 | encode)); } void Assembler::pslld(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xF2, (0xC0 | encode)); } void Assembler::psllq(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9354,7 +9105,6 @@ void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_l void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); // XMM6 is for /6 encoding: 66 0F 72 /6 ib int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9394,7 +9144,6 @@ void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int // Shift packed integers logically right by specified number of bits. void Assembler::psrlw(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 71 /2 ib int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9402,7 +9151,6 @@ void Assembler::psrlw(XMMRegister dst, int shift) { } void Assembler::psrld(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); // XMM2 is for /2 encoding: 66 0F 72 /2 ib int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9412,7 +9160,6 @@ void Assembler::psrld(XMMRegister dst, int shift) { void Assembler::psrlq(XMMRegister dst, int shift) { // Do not confuse it with psrldq SSE2 instruction which // shifts 128 bit value in xmm register by number of bytes. - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); // XMM2 is for /2 encoding: 66 0F 73 /2 ib @@ -9421,21 +9168,18 @@ void Assembler::psrlq(XMMRegister dst, int shift) { } void Assembler::psrlw(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xD1, (0xC0 | encode)); } void Assembler::psrld(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xD2, (0xC0 | encode)); } void Assembler::psrlq(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9507,7 +9251,6 @@ void Assembler::evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int // Shift packed integers arithmetically right by specified number of bits. void Assembler::psraw(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 71 /4 ib int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9515,7 +9258,6 @@ void Assembler::psraw(XMMRegister dst, int shift) { } void Assembler::psrad(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); // XMM4 is for /4 encoding: 66 0F 72 /4 ib int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9525,14 +9267,12 @@ void Assembler::psrad(XMMRegister dst, int shift) { } void Assembler::psraw(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xE1, (0xC0 | encode)); } void Assembler::psrad(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xE2, (0xC0 | encode)); @@ -9588,7 +9328,6 @@ void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int // logical operations packed integers void Assembler::pand(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xDB, (0xC0 | encode)); @@ -9691,7 +9430,6 @@ void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, in } void Assembler::pandn(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_rex_vex_w_reverted(); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); @@ -9706,7 +9444,6 @@ void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve } void Assembler::por(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xEB, (0xC0 | encode)); @@ -9767,7 +9504,6 @@ void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address } void Assembler::pxor(XMMRegister dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16((unsigned char)0xEF, (0xC0 | encode)); @@ -12401,7 +12137,6 @@ void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegist void Assembler::gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8) { assert(VM_Version::supports_gfni(), ""); - NOT_LP64(assert(VM_Version::supports_sse(), "");) InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8); @@ -12409,7 +12144,6 @@ void Assembler::gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8) { void Assembler::vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len) { assert(VM_Version::supports_gfni(), "requires GFNI support"); - NOT_LP64(assert(VM_Version::supports_sse(), "");) InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24((unsigned char)0xCE, (unsigned char)(0xC0 | encode), imm8); @@ -12901,428 +12635,6 @@ void Assembler::emit_farith(int b1, int b2, int i) { emit_int16(b1, b2 + i); } -#ifndef _LP64 -// 32bit only pieces of the assembler - -void Assembler::emms() { - NOT_LP64(assert(VM_Version::supports_mmx(), "")); - emit_int16(0x0F, 0x77); -} - -void Assembler::vzeroupper() { - vzeroupper_uncached(); -} - -void Assembler::cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec) { - // NO PREFIX AS NEVER 64BIT - InstructionMark im(this); - emit_int16((unsigned char)0x81, (0xF8 | src1->encoding())); - emit_data(imm32, rspec, 0); -} - -void Assembler::cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec) { - // NO PREFIX AS NEVER 64BIT (not even 32bit versions of 64bit regs - InstructionMark im(this); - emit_int8((unsigned char)0x81); - emit_operand(rdi, src1, 4); - emit_data(imm32, rspec, 0); -} - -// The 64-bit (32bit platform) cmpxchg compares the value at adr with the contents of rdx:rax, -// and stores rcx:rbx into adr if so; otherwise, the value at adr is loaded -// into rdx:rax. The ZF is set if the compared values were equal, and cleared otherwise. -void Assembler::cmpxchg8(Address adr) { - InstructionMark im(this); - emit_int16(0x0F, (unsigned char)0xC7); - emit_operand(rcx, adr, 0); -} - -void Assembler::decl(Register dst) { - // Don't use it directly. Use MacroAssembler::decrementl() instead. - emit_int8(0x48 | dst->encoding()); -} - -void Assembler::edecl(Register dst, Register src, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x48 | src->encoding()); -} - -// 64bit doesn't use the x87 - -void Assembler::fabs() { - emit_int16((unsigned char)0xD9, (unsigned char)0xE1); -} - -void Assembler::fadd(int i) { - emit_farith(0xD8, 0xC0, i); -} - -void Assembler::fadd_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rax, src, 0); -} - -void Assembler::fadd_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rax, src, 0); -} - -void Assembler::fadda(int i) { - emit_farith(0xDC, 0xC0, i); -} - -void Assembler::faddp(int i) { - emit_farith(0xDE, 0xC0, i); -} - -void Assembler::fchs() { - emit_int16((unsigned char)0xD9, (unsigned char)0xE0); -} - -void Assembler::fcom(int i) { - emit_farith(0xD8, 0xD0, i); -} - -void Assembler::fcomp(int i) { - emit_farith(0xD8, 0xD8, i); -} - -void Assembler::fcomp_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rbx, src, 0); -} - -void Assembler::fcomp_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rbx, src, 0); -} - -void Assembler::fcompp() { - emit_int16((unsigned char)0xDE, (unsigned char)0xD9); -} - -void Assembler::fcos() { - emit_int16((unsigned char)0xD9, (unsigned char)0xFF); -} - -void Assembler::fdecstp() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF6); -} - -void Assembler::fdiv(int i) { - emit_farith(0xD8, 0xF0, i); -} - -void Assembler::fdiv_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rsi, src, 0); -} - -void Assembler::fdiv_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rsi, src, 0); -} - -void Assembler::fdiva(int i) { - emit_farith(0xDC, 0xF8, i); -} - -// Note: The Intel manual (Pentium Processor User's Manual, Vol.3, 1994) -// is erroneous for some of the floating-point instructions below. - -void Assembler::fdivp(int i) { - emit_farith(0xDE, 0xF8, i); // ST(0) <- ST(0) / ST(1) and pop (Intel manual wrong) -} - -void Assembler::fdivr(int i) { - emit_farith(0xD8, 0xF8, i); -} - -void Assembler::fdivr_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rdi, src, 0); -} - -void Assembler::fdivr_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rdi, src, 0); -} - -void Assembler::fdivra(int i) { - emit_farith(0xDC, 0xF0, i); -} - -void Assembler::fdivrp(int i) { - emit_farith(0xDE, 0xF0, i); // ST(0) <- ST(1) / ST(0) and pop (Intel manual wrong) -} - -void Assembler::ffree(int i) { - emit_farith(0xDD, 0xC0, i); -} - -void Assembler::fild_d(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDF); - emit_operand32(rbp, adr, 0); -} - -void Assembler::fild_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDB); - emit_operand32(rax, adr, 0); -} - -void Assembler::fincstp() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF7); -} - -void Assembler::finit() { - emit_int24((unsigned char)0x9B, (unsigned char)0xDB, (unsigned char)0xE3); -} - -void Assembler::fist_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDB); - emit_operand32(rdx, adr, 0); -} - -void Assembler::fistp_d(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDF); - emit_operand32(rdi, adr, 0); -} - -void Assembler::fistp_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDB); - emit_operand32(rbx, adr, 0); -} - -void Assembler::fld1() { - emit_int16((unsigned char)0xD9, (unsigned char)0xE8); -} - -void Assembler::fld_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xD9); - emit_operand32(rax, adr, 0); -} - - -void Assembler::fld_s(int index) { - emit_farith(0xD9, 0xC0, index); -} - -void Assembler::fldcw(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD9); - emit_operand32(rbp, src, 0); -} - -void Assembler::fldenv(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD9); - emit_operand32(rsp, src, 0); -} - -void Assembler::fldlg2() { - emit_int16((unsigned char)0xD9, (unsigned char)0xEC); -} - -void Assembler::fldln2() { - emit_int16((unsigned char)0xD9, (unsigned char)0xED); -} - -void Assembler::fldz() { - emit_int16((unsigned char)0xD9, (unsigned char)0xEE); -} - -void Assembler::flog() { - fldln2(); - fxch(); - fyl2x(); -} - -void Assembler::flog10() { - fldlg2(); - fxch(); - fyl2x(); -} - -void Assembler::fmul(int i) { - emit_farith(0xD8, 0xC8, i); -} - -void Assembler::fmul_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rcx, src, 0); -} - -void Assembler::fmul_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rcx, src, 0); -} - -void Assembler::fmula(int i) { - emit_farith(0xDC, 0xC8, i); -} - -void Assembler::fmulp(int i) { - emit_farith(0xDE, 0xC8, i); -} - -void Assembler::fnsave(Address dst) { - InstructionMark im(this); - emit_int8((unsigned char)0xDD); - emit_operand32(rsi, dst, 0); -} - -void Assembler::fnstcw(Address src) { - InstructionMark im(this); - emit_int16((unsigned char)0x9B, (unsigned char)0xD9); - emit_operand32(rdi, src, 0); -} - -void Assembler::fprem1() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF5); -} - -void Assembler::frstor(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDD); - emit_operand32(rsp, src, 0); -} - -void Assembler::fsin() { - emit_int16((unsigned char)0xD9, (unsigned char)0xFE); -} - -void Assembler::fsqrt() { - emit_int16((unsigned char)0xD9, (unsigned char)0xFA); -} - -void Assembler::fst_d(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xDD); - emit_operand32(rdx, adr, 0); -} - -void Assembler::fst_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xD9); - emit_operand32(rdx, adr, 0); -} - -void Assembler::fstp_s(Address adr) { - InstructionMark im(this); - emit_int8((unsigned char)0xD9); - emit_operand32(rbx, adr, 0); -} - -void Assembler::fsub(int i) { - emit_farith(0xD8, 0xE0, i); -} - -void Assembler::fsub_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rsp, src, 0); -} - -void Assembler::fsub_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rsp, src, 0); -} - -void Assembler::fsuba(int i) { - emit_farith(0xDC, 0xE8, i); -} - -void Assembler::fsubp(int i) { - emit_farith(0xDE, 0xE8, i); // ST(0) <- ST(0) - ST(1) and pop (Intel manual wrong) -} - -void Assembler::fsubr(int i) { - emit_farith(0xD8, 0xE8, i); -} - -void Assembler::fsubr_d(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xDC); - emit_operand32(rbp, src, 0); -} - -void Assembler::fsubr_s(Address src) { - InstructionMark im(this); - emit_int8((unsigned char)0xD8); - emit_operand32(rbp, src, 0); -} - -void Assembler::fsubra(int i) { - emit_farith(0xDC, 0xE0, i); -} - -void Assembler::fsubrp(int i) { - emit_farith(0xDE, 0xE0, i); // ST(0) <- ST(1) - ST(0) and pop (Intel manual wrong) -} - -void Assembler::ftan() { - emit_int32((unsigned char)0xD9, (unsigned char)0xF2, (unsigned char)0xDD, (unsigned char)0xD8); -} - -void Assembler::ftst() { - emit_int16((unsigned char)0xD9, (unsigned char)0xE4); -} - -void Assembler::fucomi(int i) { - // make sure the instruction is supported (introduced for P6, together with cmov) - guarantee(VM_Version::supports_cmov(), "illegal instruction"); - emit_farith(0xDB, 0xE8, i); -} - -void Assembler::fucomip(int i) { - // make sure the instruction is supported (introduced for P6, together with cmov) - guarantee(VM_Version::supports_cmov(), "illegal instruction"); - emit_farith(0xDF, 0xE8, i); -} - -void Assembler::fwait() { - emit_int8((unsigned char)0x9B); -} - -void Assembler::fxch(int i) { - emit_farith(0xD9, 0xC8, i); -} - -void Assembler::fyl2x() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF1); -} - -void Assembler::frndint() { - emit_int16((unsigned char)0xD9, (unsigned char)0xFC); -} - -void Assembler::f2xm1() { - emit_int16((unsigned char)0xD9, (unsigned char)0xF0); -} - -void Assembler::fldl2e() { - emit_int16((unsigned char)0xD9, (unsigned char)0xEA); -} -#endif // !_LP64 - // SSE SIMD prefix byte values corresponding to VexSimdPrefix encoding. static int simd_pre[4] = { 0, 0x66, 0xF3, 0xF2 }; // SSE opcode second byte values (first is 0x0F) corresponding to VexOpcode encoding. @@ -13456,7 +12768,7 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix // is allowed in legacy mode and has resources which will fit in it. // Pure EVEX instructions will have is_evex_instruction set in their definition. if (!attributes->is_legacy_mode()) { - if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) { + if (UseAVX > 2 && !attributes->is_evex_instruction()) { if ((attributes->get_vector_len() != AVX_512bit) && !is_extended) { attributes->set_is_legacy_mode(); } @@ -13471,7 +12783,6 @@ void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix assert((!is_extended || (!attributes->is_legacy_mode())),"XMM register should be 0-15"); } - clear_managed(); if (UseAVX > 2 && !attributes->is_legacy_mode()) { bool evex_r = (xreg_enc >= 16); @@ -13519,7 +12830,7 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS // is allowed in legacy mode and has resources which will fit in it. // Pure EVEX instructions will have is_evex_instruction set in their definition. if (!attributes->is_legacy_mode()) { - if (UseAVX > 2 && !attributes->is_evex_instruction() && !is_managed()) { + if (UseAVX > 2 && !attributes->is_evex_instruction()) { if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) && !is_extended) { attributes->set_is_legacy_mode(); @@ -13541,7 +12852,6 @@ int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexS assert(((!is_extended) || (!attributes->is_legacy_mode())),"XMM register should be 0-15"); } - clear_managed(); if (UseAVX > 2 && !attributes->is_legacy_mode()) { bool evex_r = (dst_enc >= 16); @@ -14411,55 +13721,6 @@ void Assembler::evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, b emit_int16((unsigned char)0x8A, (0xC0 | encode)); } -#ifndef _LP64 - -void Assembler::incl(Register dst) { - // Don't use it directly. Use MacroAssembler::incrementl() instead. - emit_int8(0x40 | dst->encoding()); -} - -void Assembler::eincl(Register dst, Register src, bool no_flags) { - InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); - (void) evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); - emit_int8(0x40 | src->encoding()); -} - -void Assembler::lea(Register dst, Address src) { - leal(dst, src); -} - -void Assembler::mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec) { - InstructionMark im(this); - emit_int8((unsigned char)0xC7); - emit_operand(rax, dst, 4); - emit_data((int)imm32, rspec, 0); -} - -void Assembler::mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec) { - InstructionMark im(this); - int encode = prefix_and_encode(dst->encoding()); - emit_int8((0xB8 | encode)); - emit_data((int)imm32, rspec, 0); -} - -void Assembler::popa() { // 32bit - emit_int8(0x61); -} - -void Assembler::push_literal32(int32_t imm32, RelocationHolder const& rspec) { - InstructionMark im(this); - emit_int8(0x68); - emit_data(imm32, rspec, 0); -} - -void Assembler::pusha() { // 32bit - emit_int8(0x60); -} - -#else // LP64 - -// 64bit only pieces of the assembler - // This should only be used by 64bit instructions that can use rip-relative // it cannot be used by instructions that want an immediate value. @@ -15413,14 +14674,12 @@ void Assembler::cmpxchgq(Register reg, Address adr) { } void Assembler::cvtsi2sdq(XMMRegister dst, Register src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes, true); emit_int16(0x2A, (0xC0 | encode)); } void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -15430,7 +14689,6 @@ void Assembler::cvtsi2sdq(XMMRegister dst, Address src) { } void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionMark im(this); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); @@ -15440,7 +14698,6 @@ void Assembler::cvtsi2ssq(XMMRegister dst, Address src) { } void Assembler::cvttsd2siq(Register dst, Address src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); // F2 REX.W 0F 2C /r // CVTTSD2SI r64, xmm1/m64 InstructionMark im(this); @@ -15451,21 +14708,18 @@ void Assembler::cvttsd2siq(Register dst, Address src) { } void Assembler::cvttsd2siq(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int16(0x2C, (0xC0 | encode)); } void Assembler::cvtsd2siq(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int16(0x2D, (0xC0 | encode)); } void Assembler::cvttss2siq(Register dst, XMMRegister src) { - NOT_LP64(assert(VM_Version::supports_sse(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x2C, (0xC0 | encode)); @@ -15812,7 +15066,6 @@ void Assembler::elzcntq(Register dst, Address src, bool no_flags) { void Assembler::movdq(XMMRegister dst, Register src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); emit_int16(0x6E, (0xC0 | encode)); @@ -15820,7 +15073,6 @@ void Assembler::movdq(XMMRegister dst, Register src) { void Assembler::movdq(Register dst, XMMRegister src) { // table D-1 says MMX/SSE2 - NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); // swap src/dst to get correct prefix int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes, true); @@ -16423,7 +15675,6 @@ void Assembler::rorxq(Register dst, Address src, int imm8) { emit_int8(imm8); } -#ifdef _LP64 void Assembler::salq(Address dst, int imm8) { InstructionMark im(this); assert(isShiftCount(imm8 >> 1), "illegal shift count"); @@ -16578,7 +15829,6 @@ void Assembler::esarq(Register dst, Register src, bool no_flags) { int encode = evex_prefix_and_encode_ndd(0, dst->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F_3C, &attributes, no_flags); emit_int16((unsigned char)0xD3, (0xF8 | encode)); } -#endif void Assembler::sbbq(Address dst, int32_t imm32) { InstructionMark im(this); @@ -16923,8 +16173,6 @@ void Assembler::exorq(Register dst, Address src1, Register src2, bool no_flags) emit_operand(src2, src1, 0); } -#endif // !LP64 - void InstructionAttr::set_address_attributes(int tuple_type, int input_size_in_bits) { if (VM_Version::supports_evex()) { _tuple_type = tuple_type; diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 25be0d6a48d32..356d7741b796d 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -35,7 +35,6 @@ class Argument { public: enum { -#ifdef _LP64 #ifdef _WIN64 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...) n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... ) @@ -49,16 +48,10 @@ class Argument { #endif // _WIN64 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ... n_float_register_parameters_j = 8 // j_farg0, j_farg1, ... -#else - n_register_parameters = 0, // 0 registers used to pass arguments - n_int_register_parameters_j = 0, - n_float_register_parameters_j = 0 -#endif // _LP64 }; }; -#ifdef _LP64 // Symbolically name the register arguments used by the c calling convention. // Windows is different from linux/solaris. So much for standards... @@ -138,14 +131,6 @@ constexpr Register rscratch2 = r11; // volatile constexpr Register r12_heapbase = r12; // callee-saved constexpr Register r15_thread = r15; // callee-saved -#else -// rscratch1 will appear in 32bit code that is dead but of course must compile -// Using noreg ensures if the dead code is incorrectly live and executed it -// will cause an assertion failure -#define rscratch1 noreg -#define rscratch2 noreg - -#endif // _LP64 // JSR 292 // On x86, the SP does not have to be saved when invoking method handle intrinsics @@ -168,7 +153,7 @@ class Address { times_2 = 1, times_4 = 2, times_8 = 3, - times_ptr = LP64_ONLY(times_8) NOT_LP64(times_4) + times_ptr = times_8 }; static ScaleFactor times(int size) { assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); @@ -197,7 +182,6 @@ class Address { // Easily misused constructors make them private // %%% can we make these go away? - NOT_LP64(Address(address loc, RelocationHolder spec);) Address(int disp, address loc, relocInfo::relocType rtype); Address(int disp, address loc, RelocationHolder spec); @@ -456,7 +440,7 @@ class InstructionAttr; // 64-bit reflect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes // See fxsave and xsave(EVEX enabled) documentation for layout -const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY(2688 / wordSize); +const int FPUStateSizeInWords = 2688 / wordSize; // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write @@ -626,12 +610,8 @@ class Assembler : public AbstractAssembler { imm_operand = 0, // embedded 32-bit|64-bit immediate operand disp32_operand = 1, // embedded 32-bit displacement or address call32_operand = 2, // embedded 32-bit self-relative displacement -#ifndef _LP64 - _WhichOperand_limit = 3 -#else - narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop + narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop _WhichOperand_limit = 4 -#endif }; // Comparison predicates for integral types & FP types when using SSE @@ -719,7 +699,6 @@ class Assembler : public AbstractAssembler { bool _legacy_mode_dq; bool _legacy_mode_vl; bool _legacy_mode_vlbw; - NOT_LP64(bool _is_managed;) InstructionAttr *_attributes; void set_attributes(InstructionAttr* attributes); @@ -904,25 +883,13 @@ class Assembler : public AbstractAssembler { void emit_opcode_prefix_and_encoding(int byte1, int ocp_and_encoding); void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding); void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding, int byte3); - bool always_reachable(AddressLiteral adr) NOT_LP64( { return true; } ); - bool reachable(AddressLiteral adr) NOT_LP64( { return true; } ); + bool always_reachable(AddressLiteral adr); + bool reachable(AddressLiteral adr); // These are all easily abused and hence protected public: - // 32BIT ONLY SECTION -#ifndef _LP64 - // Make these disappear in 64bit mode since they would never be correct - void cmp_literal32(Register src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY - void cmp_literal32(Address src1, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY - - void mov_literal32(Register dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY - void mov_literal32(Address dst, int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY - - void push_literal32(int32_t imm32, RelocationHolder const& rspec); // 32BIT ONLY -#else - // 64BIT ONLY SECTION void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec); @@ -930,7 +897,6 @@ class Assembler : public AbstractAssembler { void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec); void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec); -#endif // _LP64 protected: // These are unique in that we are ensured by the caller that the 32bit @@ -1008,23 +974,14 @@ class Assembler : public AbstractAssembler { int cur_tuple_type, int in_size_in_bits, int cur_encoding); // Generic instructions - // Does 32bit or 64bit as needed for the platform. In some sense these - // belong in macro assembler but there is no need for both varieties to exist void init_attributes(void); void clear_attributes(void) { _attributes = nullptr; } - void set_managed(void) { NOT_LP64(_is_managed = true;) } - void clear_managed(void) { NOT_LP64(_is_managed = false;) } - bool is_managed(void) { - NOT_LP64(return _is_managed;) - LP64_ONLY(return false;) } - void lea(Register dst, Address src); void mov(Register dst, Register src); -#ifdef _LP64 // support caching the result of some routines // must be called before pusha(), popa(), vzeroupper() - checked with asserts @@ -1044,7 +1001,6 @@ class Assembler : public AbstractAssembler { // New Zero Upper setcc instruction. void esetzucc(Condition cc, Register dst); -#endif void vzeroupper_uncached(); void decq(Register dst); void edecq(Register dst, Register src, bool no_flags); @@ -1066,9 +1022,7 @@ class Assembler : public AbstractAssembler { void rep_stos(); void rep_stosb(); void repne_scan(); -#ifdef _LP64 void repne_scanl(); -#endif // Vanilla instructions in lexical order @@ -1118,15 +1072,13 @@ class Assembler : public AbstractAssembler { void eincq(Register dst, Register src, bool no_flags); void eincq(Register dst, Address src, bool no_flags); -#ifdef _LP64 - //Add Unsigned Integers with Carry Flag + // Add Unsigned Integers with Carry Flag void adcxq(Register dst, Register src); void eadcxq(Register dst, Register src1, Register src2); - //Add Unsigned Integers with Overflow Flag + // Add Unsigned Integers with Overflow Flag void adoxq(Register dst, Register src); void eadoxq(Register dst, Register src1, Register src2); -#endif void addr_nop_4(); void addr_nop_5(); @@ -1203,10 +1155,8 @@ class Assembler : public AbstractAssembler { void bsfl(Register dst, Register src); void bsrl(Register dst, Register src); -#ifdef _LP64 void bsfq(Register dst, Register src); void bsrq(Register dst, Register src); -#endif void bswapl(Register reg); @@ -1392,138 +1342,6 @@ class Assembler : public AbstractAssembler { void emit_farith(int b1, int b2, int i); public: -#ifndef _LP64 - void emms(); - - void fabs(); - - void fadd(int i); - - void fadd_d(Address src); - void fadd_s(Address src); - - // "Alternate" versions of x87 instructions place result down in FPU - // stack instead of on TOS - - void fadda(int i); // "alternate" fadd - void faddp(int i = 1); - - void fchs(); - - void fcom(int i); - - void fcomp(int i = 1); - void fcomp_d(Address src); - void fcomp_s(Address src); - - void fcompp(); - - void fcos(); - - void fdecstp(); - - void fdiv(int i); - void fdiv_d(Address src); - void fdivr_s(Address src); - void fdiva(int i); // "alternate" fdiv - void fdivp(int i = 1); - - void fdivr(int i); - void fdivr_d(Address src); - void fdiv_s(Address src); - - void fdivra(int i); // "alternate" reversed fdiv - - void fdivrp(int i = 1); - - void ffree(int i = 0); - - void fild_d(Address adr); - void fild_s(Address adr); - - void fincstp(); - - void finit(); - - void fist_s (Address adr); - void fistp_d(Address adr); - void fistp_s(Address adr); - - void fld1(); - - void fld_s(Address adr); - void fld_s(int index); - - void fldcw(Address src); - - void fldenv(Address src); - - void fldlg2(); - - void fldln2(); - - void fldz(); - - void flog(); - void flog10(); - - void fmul(int i); - - void fmul_d(Address src); - void fmul_s(Address src); - - void fmula(int i); // "alternate" fmul - - void fmulp(int i = 1); - - void fnsave(Address dst); - - void fnstcw(Address src); - void fprem1(); - - void frstor(Address src); - - void fsin(); - - void fsqrt(); - - void fst_d(Address adr); - void fst_s(Address adr); - - void fstp_s(Address adr); - - void fsub(int i); - void fsub_d(Address src); - void fsub_s(Address src); - - void fsuba(int i); // "alternate" fsub - - void fsubp(int i = 1); - - void fsubr(int i); - void fsubr_d(Address src); - void fsubr_s(Address src); - - void fsubra(int i); // "alternate" reversed fsub - - void fsubrp(int i = 1); - - void ftan(); - - void ftst(); - - void fucomi(int i = 1); - void fucomip(int i = 1); - - void fwait(); - - void fxch(int i = 1); - - void fyl2x(); - void frndint(); - void f2xm1(); - void fldl2e(); -#endif // !_LP64 // operands that only take the original 32bit registers void emit_operand32(Register reg, Address adr, int post_addr_length); @@ -1543,12 +1361,10 @@ class Assembler : public AbstractAssembler { void divl(Register src); // Unsigned division void edivl(Register src, bool no_flags); // Unsigned division -#ifdef _LP64 void idivq(Register src); void eidivq(Register src, bool no_flags); void divq(Register src); // Unsigned division void edivq(Register src, bool no_flags); // Unsigned division -#endif void imull(Register src); void eimull(Register src, bool no_flags); @@ -1561,7 +1377,6 @@ class Assembler : public AbstractAssembler { void imull(Register dst, Address src); void eimull(Register dst, Register src1, Address src2, bool no_flags); -#ifdef _LP64 void imulq(Register dst, Register src); void eimulq(Register dst, Register src, bool no_flags); void eimulq(Register dst, Register src1, Register src2, bool no_flags); @@ -1574,7 +1389,6 @@ class Assembler : public AbstractAssembler { void eimulq(Register dst, Register src1, Address src2, bool no_flags); void imulq(Register dst); void eimulq(Register dst, bool no_flags); -#endif // jcc is the generic conditional branch generator to run- // time routines, jcc is used for branches to labels. jcc @@ -1626,9 +1440,7 @@ class Assembler : public AbstractAssembler { void leaq(Register dst, Address src); -#ifdef _LP64 void lea(Register dst, Label& L); -#endif void lfence(); @@ -1640,12 +1452,10 @@ class Assembler : public AbstractAssembler { void lzcntl(Register dst, Address src); void elzcntl(Register dst, Address src, bool no_flags); -#ifdef _LP64 void lzcntq(Register dst, Register src); void elzcntq(Register dst, Register src, bool no_flags); void lzcntq(Register dst, Address src); void elzcntq(Register dst, Address src, bool no_flags); -#endif enum Membar_mask_bits { StoreStore = 1 << 3, @@ -1797,13 +1607,11 @@ class Assembler : public AbstractAssembler { void movl(Register dst, Address src); void movl(Address dst, Register src); -#ifdef _LP64 void movq(Register dst, Register src); void movq(Register dst, Address src); void movq(Address dst, Register src); void movq(Address dst, int32_t imm32); void movq(Register dst, int32_t imm32); -#endif // Move Quadword void movq(Address dst, XMMRegister src); @@ -1815,7 +1623,6 @@ class Assembler : public AbstractAssembler { void movsbl(Register dst, Address src); void movsbl(Register dst, Register src); -#ifdef _LP64 void movsbq(Register dst, Address src); void movsbq(Register dst, Register src); @@ -1824,15 +1631,12 @@ class Assembler : public AbstractAssembler { void movslq(Register dst, Address src); void movslq(Register dst, Register src); -#endif void movswl(Register dst, Address src); void movswl(Register dst, Register src); -#ifdef _LP64 void movswq(Register dst, Address src); void movswq(Register dst, Register src); -#endif void movups(XMMRegister dst, Address src); void vmovups(XMMRegister dst, Address src, int vector_len); @@ -1846,18 +1650,14 @@ class Assembler : public AbstractAssembler { void movzbl(Register dst, Address src); void movzbl(Register dst, Register src); -#ifdef _LP64 void movzbq(Register dst, Address src); void movzbq(Register dst, Register src); -#endif void movzwl(Register dst, Address src); void movzwl(Register dst, Register src); -#ifdef _LP64 void movzwq(Register dst, Address src); void movzwq(Register dst, Register src); -#endif // Unsigned multiply with RAX destination register void mull(Address src); @@ -1865,13 +1665,11 @@ class Assembler : public AbstractAssembler { void mull(Register src); void emull(Register src, bool no_flags); -#ifdef _LP64 void mulq(Address src); void emulq(Address src, bool no_flags); void mulq(Register src); void emulq(Register src, bool no_flags); void mulxq(Register dst1, Register dst2, Register src); -#endif // Multiply Scalar Double-Precision Floating-Point Values void mulsd(XMMRegister dst, Address src); @@ -1886,26 +1684,22 @@ class Assembler : public AbstractAssembler { void negl(Address dst); void enegl(Register dst, Address src, bool no_flags); -#ifdef _LP64 void negq(Register dst); void enegq(Register dst, Register src, bool no_flags); void negq(Address dst); void enegq(Register dst, Address src, bool no_flags); -#endif void nop(uint i = 1); void notl(Register dst); void enotl(Register dst, Register src); -#ifdef _LP64 void notq(Register dst); void enotq(Register dst, Register src); void btsq(Address dst, int imm8); void btrq(Address dst, int imm8); void btq(Register src, int imm8); -#endif void btq(Register dst, Register src); void eorw(Register dst, Register src1, Register src2, bool no_flags); @@ -2124,14 +1918,8 @@ class Assembler : public AbstractAssembler { // Multiply add accumulate void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); -#ifndef _LP64 // no 32bit push/pop on amd64 - void popl(Address dst); -#endif - -#ifdef _LP64 void popq(Address dst); void popq(Register dst); -#endif void popcntl(Register dst, Address src); void epopcntl(Register dst, Address src, bool no_flags); @@ -2143,12 +1931,10 @@ class Assembler : public AbstractAssembler { void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); -#ifdef _LP64 void popcntq(Register dst, Address src); void epopcntq(Register dst, Address src, bool no_flags); void popcntq(Register dst, Register src); void epopcntq(Register dst, Register src, bool no_flags); -#endif // Prefetches (SSE, SSE2, 3DNOW only) @@ -2240,10 +2026,6 @@ class Assembler : public AbstractAssembler { // Vector sum of absolute difference. void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); -#ifndef _LP64 // no 32bit push/pop on amd64 - void pushl(Address src); -#endif - void pushq(Address src); void rcll(Register dst, int imm8); @@ -2275,7 +2057,6 @@ class Assembler : public AbstractAssembler { void rorl(Register dst, int imm8); void erorl(Register dst, Register src, int imm8, bool no_flags); -#ifdef _LP64 void rolq(Register dst); void erolq(Register dst, Register src, bool no_flags); void rolq(Register dst, int imm8); @@ -2288,7 +2069,6 @@ class Assembler : public AbstractAssembler { void rorxl(Register dst, Address src, int imm8); void rorxq(Register dst, Register src, int imm8); void rorxq(Register dst, Address src, int imm8); -#endif void sahf(); @@ -2310,7 +2090,6 @@ class Assembler : public AbstractAssembler { void sarl(Register dst); void esarl(Register dst, Register src, bool no_flags); -#ifdef _LP64 void salq(Register dst, int imm8); void esalq(Register dst, Register src, int imm8, bool no_flags); void salq(Register dst); @@ -2328,7 +2107,6 @@ class Assembler : public AbstractAssembler { void esarq(Register dst, Register src, int imm8, bool no_flags); void sarq(Register dst); void esarq(Register dst, Register src, bool no_flags); -#endif void sbbl(Address dst, int32_t imm32); void sbbl(Register dst, int32_t imm32); @@ -2369,12 +2147,10 @@ class Assembler : public AbstractAssembler { void eshrdl(Register dst, Register src1, Register src2, bool no_flags); void shrdl(Register dst, Register src, int8_t imm8); void eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags); -#ifdef _LP64 void shldq(Register dst, Register src, int8_t imm8); void eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags); void shrdq(Register dst, Register src, int8_t imm8); void eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags); -#endif void shll(Register dst, int imm8); void eshll(Register dst, Register src, int imm8, bool no_flags); diff --git a/src/hotspot/cpu/x86/assembler_x86.inline.hpp b/src/hotspot/cpu/x86/assembler_x86.inline.hpp index f5cc75a55c5d8..69e826c99be90 100644 --- a/src/hotspot/cpu/x86/assembler_x86.inline.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.inline.hpp @@ -25,58 +25,6 @@ #ifndef CPU_X86_ASSEMBLER_X86_INLINE_HPP #define CPU_X86_ASSEMBLER_X86_INLINE_HPP -#include "asm/assembler.inline.hpp" -#include "asm/codeBuffer.hpp" -#include "code/codeCache.hpp" - -#ifndef _LP64 -inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst, bool is_map1) -{ - int opc_prefix = is_map1 ? 0x0F00 : 0; - return opc_prefix | reg_enc; -} - -inline int Assembler::prefixq_and_encode(int reg_enc, bool is_map1) { - int opc_prefix = is_map1 ? 0xF00 : 0; - return opc_prefix | reg_enc; -} - -inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte, bool is_map1) { - int opc_prefix = is_map1 ? 0xF00 : 0; - return opc_prefix | (dst_enc << 3 | src_enc); -} - -inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc, bool is_map1) { - int opc_prefix = is_map1 ? 0xF00 : 0; - return opc_prefix | dst_enc << 3 | src_enc; -} - -inline void Assembler::prefix(Register reg) {} -inline void Assembler::prefix(Register dst, Register src, Prefix p) {} -inline void Assembler::prefix(Register dst, Address adr, Prefix p) {} - -inline void Assembler::prefix(Address adr, bool is_map1) { - if (is_map1) { - emit_int8(0x0F); - } -} - -inline void Assembler::prefixq(Address adr) {} - -inline void Assembler::prefix(Address adr, Register reg, bool byteinst, bool is_map1) { - if (is_map1) { - emit_int8(0x0F); - } -} -inline void Assembler::prefixq(Address adr, Register reg, bool is_map1) { - if (is_map1) { - emit_int8(0x0F); - } -} - -inline void Assembler::prefix(Address adr, XMMRegister reg) {} -inline void Assembler::prefixq(Address adr, XMMRegister reg) {} - -#endif // _LP64 +// Nothing is here. Left to allow CPU_HEADER_INLINE macro to work. #endif // CPU_X86_ASSEMBLER_X86_INLINE_HPP diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 71ca9351f86c9..0ea261c1c5145 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -38,66 +38,11 @@ #define __ ce->masm()-> -#ifndef _LP64 -float ConversionStub::float_zero = 0.0; -double ConversionStub::double_zero = 0.0; - -void ConversionStub::emit_code(LIR_Assembler* ce) { - __ bind(_entry); - assert(bytecode() == Bytecodes::_f2i || bytecode() == Bytecodes::_d2i, "other conversions do not require stub"); - - - if (input()->is_single_xmm()) { - __ comiss(input()->as_xmm_float_reg(), - ExternalAddress((address)&float_zero)); - } else if (input()->is_double_xmm()) { - __ comisd(input()->as_xmm_double_reg(), - ExternalAddress((address)&double_zero)); - } else { - __ push(rax); - __ ftst(); - __ fnstsw_ax(); - __ sahf(); - __ pop(rax); - } - - Label NaN, do_return; - __ jccb(Assembler::parity, NaN); - __ jccb(Assembler::below, do_return); - - // input is > 0 -> return maxInt - // result register already contains 0x80000000, so subtracting 1 gives 0x7fffffff - __ decrement(result()->as_register()); - __ jmpb(do_return); - - // input is NaN -> return 0 - __ bind(NaN); - __ xorptr(result()->as_register(), result()->as_register()); - - __ bind(do_return); - __ jmp(_continuation); -} -#endif // !_LP64 - void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); -#ifdef _LP64 __ lea(rscratch1, safepoint_pc); __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); -#else - const Register tmp1 = rcx; - const Register tmp2 = rdx; - __ push(tmp1); - __ push(tmp2); - - __ lea(tmp1, safepoint_pc); - __ get_thread(tmp2); - __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1); - - __ pop(tmp2); - __ pop(tmp1); -#endif /* _LP64 */ assert(SharedRuntime::polling_page_return_handler_blob() != nullptr, "polling page return stub not created yet"); diff --git a/src/hotspot/cpu/x86/c1_Defs_x86.hpp b/src/hotspot/cpu/x86/c1_Defs_x86.hpp index 1125097ee0567..0ce6aaf82b1c5 100644 --- a/src/hotspot/cpu/x86/c1_Defs_x86.hpp +++ b/src/hotspot/cpu/x86/c1_Defs_x86.hpp @@ -33,21 +33,17 @@ enum { // explicit rounding operations are required to implement the strictFP mode enum { - pd_strict_fp_requires_explicit_rounding = LP64_ONLY( false ) NOT_LP64 ( true ) + pd_strict_fp_requires_explicit_rounding = false }; // registers enum { - pd_nof_cpu_regs_frame_map = NOT_LP64(8) LP64_ONLY(16), // number of registers used during code emission + pd_nof_cpu_regs_frame_map = 16, // number of registers used during code emission pd_nof_fpu_regs_frame_map = FloatRegister::number_of_registers, // number of registers used during code emission pd_nof_xmm_regs_frame_map = XMMRegister::number_of_registers, // number of registers used during code emission -#ifdef _LP64 #define UNALLOCATED 4 // rsp, rbp, r15, r10 -#else - #define UNALLOCATED 2 // rsp, rbp -#endif // LP64 pd_nof_caller_save_cpu_regs_frame_map = pd_nof_cpu_regs_frame_map - UNALLOCATED, // number of registers killed by calls pd_nof_caller_save_fpu_regs_frame_map = pd_nof_fpu_regs_frame_map, // number of registers killed by calls @@ -60,9 +56,9 @@ enum { pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan pd_nof_xmm_regs_linearscan = pd_nof_xmm_regs_frame_map, // number of registers visible to linear scan pd_first_cpu_reg = 0, - pd_last_cpu_reg = NOT_LP64(5) LP64_ONLY(11), - pd_first_byte_reg = NOT_LP64(2) LP64_ONLY(0), - pd_last_byte_reg = NOT_LP64(5) LP64_ONLY(11), + pd_last_cpu_reg = 11, + pd_first_byte_reg = 0, + pd_last_byte_reg = 11, pd_first_fpu_reg = pd_nof_cpu_regs_frame_map, pd_last_fpu_reg = pd_first_fpu_reg + 7, pd_first_xmm_reg = pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_frame_map, diff --git a/src/hotspot/cpu/x86/c1_FpuStackSim_x86.cpp b/src/hotspot/cpu/x86/c1_FpuStackSim_x86.cpp deleted file mode 100644 index 3ec182a350b8d..0000000000000 --- a/src/hotspot/cpu/x86/c1_FpuStackSim_x86.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_FpuStackSim.hpp" -#include "c1/c1_FrameMap.hpp" -#include "utilities/growableArray.hpp" -#include "utilities/ostream.hpp" - -//-------------------------------------------------------- -// FpuStackSim -//-------------------------------------------------------- - -// This class maps the FPU registers to their stack locations; it computes -// the offsets between individual registers and simulates the FPU stack. - -const int EMPTY = -1; - -int FpuStackSim::regs_at(int i) const { - assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds"); - return _regs[i]; -} - -void FpuStackSim::set_regs_at(int i, int val) { - assert(i >= 0 && i < FrameMap::nof_fpu_regs, "out of bounds"); - _regs[i] = val; -} - -void FpuStackSim::dec_stack_size() { - _stack_size--; - assert(_stack_size >= 0, "FPU stack underflow"); -} - -void FpuStackSim::inc_stack_size() { - _stack_size++; - assert(_stack_size <= FrameMap::nof_fpu_regs, "FPU stack overflow"); -} - -FpuStackSim::FpuStackSim(Compilation* compilation) - : _compilation(compilation) -{ - _stack_size = 0; - for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { - set_regs_at(i, EMPTY); - } -} - - -void FpuStackSim::pop() { - if (TraceFPUStack) { tty->print("FPU-pop "); print(); tty->cr(); } - set_regs_at(tos_index(), EMPTY); - dec_stack_size(); -} - -void FpuStackSim::pop(int rnr) { - if (TraceFPUStack) { tty->print("FPU-pop %d", rnr); print(); tty->cr(); } - assert(regs_at(tos_index()) == rnr, "rnr is not on TOS"); - set_regs_at(tos_index(), EMPTY); - dec_stack_size(); -} - - -void FpuStackSim::push(int rnr) { - if (TraceFPUStack) { tty->print("FPU-push %d", rnr); print(); tty->cr(); } - assert(regs_at(stack_size()) == EMPTY, "should be empty"); - set_regs_at(stack_size(), rnr); - inc_stack_size(); -} - - -void FpuStackSim::swap(int offset) { - if (TraceFPUStack) { tty->print("FPU-swap %d", offset); print(); tty->cr(); } - int t = regs_at(tos_index() - offset); - set_regs_at(tos_index() - offset, regs_at(tos_index())); - set_regs_at(tos_index(), t); -} - - -int FpuStackSim::offset_from_tos(int rnr) const { - for (int i = tos_index(); i >= 0; i--) { - if (regs_at(i) == rnr) { - return tos_index() - i; - } - } - assert(false, "FpuStackSim: register not found"); - BAILOUT_("FpuStackSim: register not found", 0); -} - - -int FpuStackSim::get_slot(int tos_offset) const { - return regs_at(tos_index() - tos_offset); -} - -void FpuStackSim::set_slot(int tos_offset, int rnr) { - set_regs_at(tos_index() - tos_offset, rnr); -} - -void FpuStackSim::rename(int old_rnr, int new_rnr) { - if (TraceFPUStack) { tty->print("FPU-rename %d %d", old_rnr, new_rnr); print(); tty->cr(); } - if (old_rnr == new_rnr) - return; - bool found = false; - for (int i = 0; i < stack_size(); i++) { - assert(regs_at(i) != new_rnr, "should not see old occurrences of new_rnr on the stack"); - if (regs_at(i) == old_rnr) { - set_regs_at(i, new_rnr); - found = true; - } - } - assert(found, "should have found at least one instance of old_rnr"); -} - - -bool FpuStackSim::contains(int rnr) { - for (int i = 0; i < stack_size(); i++) { - if (regs_at(i) == rnr) { - return true; - } - } - return false; -} - -bool FpuStackSim::is_empty() { -#ifdef ASSERT - if (stack_size() == 0) { - for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { - assert(regs_at(i) == EMPTY, "must be empty"); - } - } -#endif - return stack_size() == 0; -} - - -bool FpuStackSim::slot_is_empty(int tos_offset) { - return (regs_at(tos_index() - tos_offset) == EMPTY); -} - - -void FpuStackSim::clear() { - if (TraceFPUStack) { tty->print("FPU-clear"); print(); tty->cr(); } - for (int i = tos_index(); i >= 0; i--) { - set_regs_at(i, EMPTY); - } - _stack_size = 0; -} - - -intArray* FpuStackSim::write_state() { - intArray* res = new intArray(1 + FrameMap::nof_fpu_regs); - res->append(stack_size()); - for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { - res->append(regs_at(i)); - } - return res; -} - - -void FpuStackSim::read_state(intArray* fpu_stack_state) { - _stack_size = fpu_stack_state->at(0); - for (int i = 0; i < FrameMap::nof_fpu_regs; i++) { - set_regs_at(i, fpu_stack_state->at(1 + i)); - } -} - - -#ifndef PRODUCT -void FpuStackSim::print() { - tty->print(" N=%d[", stack_size());\ - for (int i = 0; i < stack_size(); i++) { - int reg = regs_at(i); - if (reg != EMPTY) { - tty->print("%d", reg); - } else { - tty->print("_"); - } - }; - tty->print(" ]"); -} -#endif diff --git a/src/hotspot/cpu/x86/c1_FpuStackSim_x86.hpp b/src/hotspot/cpu/x86/c1_FpuStackSim_x86.hpp deleted file mode 100644 index 405aebe9a7571..0000000000000 --- a/src/hotspot/cpu/x86/c1_FpuStackSim_x86.hpp +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef CPU_X86_C1_FPUSTACKSIM_X86_HPP -#define CPU_X86_C1_FPUSTACKSIM_X86_HPP - -// Simulates the FPU stack and maintains mapping [fpu-register -> stack offset] -// FPU registers are described as numbers from 0..nof_fpu_regs-1 - -class Compilation; - -class FpuStackSim { - private: - Compilation* _compilation; - int _stack_size; - int _regs[FrameMap::nof_fpu_regs]; - - int tos_index() const { return _stack_size - 1; } - - int regs_at(int i) const; - void set_regs_at(int i, int val); - void dec_stack_size(); - void inc_stack_size(); - - // unified bailout support - Compilation* compilation() const { return _compilation; } - void bailout(const char* msg) const { compilation()->bailout(msg); } - bool bailed_out() const { return compilation()->bailed_out(); } - - public: - FpuStackSim(Compilation* compilation); - void pop (); - void pop (int rnr); // rnr must be on tos - void push(int rnr); - void swap(int offset); // exchange tos with tos + offset - int offset_from_tos(int rnr) const; // return the offset of the topmost instance of rnr from TOS - int get_slot(int tos_offset) const; // return the entry at the given offset from TOS - void set_slot(int tos_offset, int rnr); // set the entry at the given offset from TOS - void rename(int old_rnr, int new_rnr); // rename all instances of old_rnr to new_rnr - bool contains(int rnr); // debugging support only - bool is_empty(); - bool slot_is_empty(int tos_offset); - int stack_size() const { return _stack_size; } - void clear(); - intArray* write_state(); - void read_state(intArray* fpu_stack_state); - - void print() PRODUCT_RETURN; -}; - -#endif // CPU_X86_C1_FPUSTACKSIM_X86_HPP diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp index 4153c37729bfc..1b7622d509bfd 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.cpp @@ -44,12 +44,8 @@ LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) { Register reg = r_1->as_Register(); if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) { Register reg2 = r_2->as_Register(); -#ifdef _LP64 assert(reg2 == reg, "must be same register"); opr = as_long_opr(reg); -#else - opr = as_long_opr(reg2, reg); -#endif // _LP64 } else if (is_reference_type(type)) { opr = as_oop_opr(reg); } else if (type == T_METADATA) { @@ -109,13 +105,9 @@ LIR_Opr FrameMap::rcx_metadata_opr; LIR_Opr FrameMap::long0_opr; LIR_Opr FrameMap::long1_opr; -LIR_Opr FrameMap::fpu0_float_opr; -LIR_Opr FrameMap::fpu0_double_opr; LIR_Opr FrameMap::xmm0_float_opr; LIR_Opr FrameMap::xmm0_double_opr; -#ifdef _LP64 - LIR_Opr FrameMap::r8_opr; LIR_Opr FrameMap::r9_opr; LIR_Opr FrameMap::r10_opr; @@ -140,7 +132,6 @@ LIR_Opr FrameMap::r11_metadata_opr; LIR_Opr FrameMap::r12_metadata_opr; LIR_Opr FrameMap::r13_metadata_opr; LIR_Opr FrameMap::r14_metadata_opr; -#endif // _LP64 LIR_Opr FrameMap::_caller_save_cpu_regs[] = {}; LIR_Opr FrameMap::_caller_save_fpu_regs[] = {}; @@ -160,7 +151,7 @@ XMMRegister FrameMap::nr2xmmreg(int rnr) { void FrameMap::initialize() { assert(!_init_done, "once"); - assert(nof_cpu_regs == LP64_ONLY(16) NOT_LP64(8), "wrong number of CPU registers"); + assert(nof_cpu_regs == 16, "wrong number of CPU registers"); map_register(0, rsi); rsi_opr = LIR_OprFact::single_cpu(0); map_register(1, rdi); rdi_opr = LIR_OprFact::single_cpu(1); map_register(2, rbx); rbx_opr = LIR_OprFact::single_cpu(2); @@ -168,11 +159,6 @@ void FrameMap::initialize() { map_register(4, rdx); rdx_opr = LIR_OprFact::single_cpu(4); map_register(5, rcx); rcx_opr = LIR_OprFact::single_cpu(5); -#ifndef _LP64 - // The unallocatable registers are at the end - map_register(6, rsp); - map_register(7, rbp); -#else map_register( 6, r8); r8_opr = LIR_OprFact::single_cpu(6); map_register( 7, r9); r9_opr = LIR_OprFact::single_cpu(7); map_register( 8, r11); r11_opr = LIR_OprFact::single_cpu(8); @@ -186,17 +172,10 @@ void FrameMap::initialize() { map_register(13, r15); r15_opr = LIR_OprFact::single_cpu(13); map_register(14, rsp); map_register(15, rbp); -#endif // _LP64 -#ifdef _LP64 long0_opr = LIR_OprFact::double_cpu(3 /*eax*/, 3 /*eax*/); long1_opr = LIR_OprFact::double_cpu(2 /*ebx*/, 2 /*ebx*/); -#else - long0_opr = LIR_OprFact::double_cpu(3 /*eax*/, 4 /*edx*/); - long1_opr = LIR_OprFact::double_cpu(2 /*ebx*/, 5 /*ecx*/); -#endif // _LP64 - fpu0_float_opr = LIR_OprFact::single_fpu(0); - fpu0_double_opr = LIR_OprFact::double_fpu(0); + xmm0_float_opr = LIR_OprFact::single_xmm(0); xmm0_double_opr = LIR_OprFact::double_xmm(0); @@ -207,15 +186,12 @@ void FrameMap::initialize() { _caller_save_cpu_regs[4] = rdx_opr; _caller_save_cpu_regs[5] = rcx_opr; -#ifdef _LP64 _caller_save_cpu_regs[6] = r8_opr; _caller_save_cpu_regs[7] = r9_opr; _caller_save_cpu_regs[8] = r11_opr; _caller_save_cpu_regs[9] = r13_opr; _caller_save_cpu_regs[10] = r14_opr; _caller_save_cpu_regs[11] = r12_opr; -#endif // _LP64 - _xmm_regs[0] = xmm0; _xmm_regs[1] = xmm1; @@ -225,33 +201,30 @@ void FrameMap::initialize() { _xmm_regs[5] = xmm5; _xmm_regs[6] = xmm6; _xmm_regs[7] = xmm7; - -#ifdef _LP64 - _xmm_regs[8] = xmm8; - _xmm_regs[9] = xmm9; - _xmm_regs[10] = xmm10; - _xmm_regs[11] = xmm11; - _xmm_regs[12] = xmm12; - _xmm_regs[13] = xmm13; - _xmm_regs[14] = xmm14; - _xmm_regs[15] = xmm15; - _xmm_regs[16] = xmm16; - _xmm_regs[17] = xmm17; - _xmm_regs[18] = xmm18; - _xmm_regs[19] = xmm19; - _xmm_regs[20] = xmm20; - _xmm_regs[21] = xmm21; - _xmm_regs[22] = xmm22; - _xmm_regs[23] = xmm23; - _xmm_regs[24] = xmm24; - _xmm_regs[25] = xmm25; - _xmm_regs[26] = xmm26; - _xmm_regs[27] = xmm27; - _xmm_regs[28] = xmm28; - _xmm_regs[29] = xmm29; - _xmm_regs[30] = xmm30; - _xmm_regs[31] = xmm31; -#endif // _LP64 + _xmm_regs[8] = xmm8; + _xmm_regs[9] = xmm9; + _xmm_regs[10] = xmm10; + _xmm_regs[11] = xmm11; + _xmm_regs[12] = xmm12; + _xmm_regs[13] = xmm13; + _xmm_regs[14] = xmm14; + _xmm_regs[15] = xmm15; + _xmm_regs[16] = xmm16; + _xmm_regs[17] = xmm17; + _xmm_regs[18] = xmm18; + _xmm_regs[19] = xmm19; + _xmm_regs[20] = xmm20; + _xmm_regs[21] = xmm21; + _xmm_regs[22] = xmm22; + _xmm_regs[23] = xmm23; + _xmm_regs[24] = xmm24; + _xmm_regs[25] = xmm25; + _xmm_regs[26] = xmm26; + _xmm_regs[27] = xmm27; + _xmm_regs[28] = xmm28; + _xmm_regs[29] = xmm29; + _xmm_regs[30] = xmm30; + _xmm_regs[31] = xmm31; for (int i = 0; i < 8; i++) { _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i); @@ -281,7 +254,6 @@ void FrameMap::initialize() { rsp_opr = as_pointer_opr(rsp); rbp_opr = as_pointer_opr(rbp); -#ifdef _LP64 r8_oop_opr = as_oop_opr(r8); r9_oop_opr = as_oop_opr(r9); r11_oop_opr = as_oop_opr(r11); @@ -295,7 +267,6 @@ void FrameMap::initialize() { r12_metadata_opr = as_metadata_opr(r12); r13_metadata_opr = as_metadata_opr(r13); r14_metadata_opr = as_metadata_opr(r14); -#endif // _LP64 VMRegPair regs; BasicType sig_bt = T_OBJECT; diff --git a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp index f2bedcb1d273b..08b872cb0951d 100644 --- a/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp +++ b/src/hotspot/cpu/x86/c1_FrameMap_x86.hpp @@ -41,13 +41,8 @@ nof_xmm_regs = pd_nof_xmm_regs_frame_map, nof_caller_save_xmm_regs = pd_nof_caller_save_xmm_regs_frame_map, first_available_sp_in_frame = 0, -#ifndef _LP64 - frame_pad_in_bytes = 8, - nof_reg_args = 2 -#else frame_pad_in_bytes = 16, nof_reg_args = 6 -#endif // _LP64 }; private: @@ -81,8 +76,6 @@ static LIR_Opr rdx_metadata_opr; static LIR_Opr rcx_metadata_opr; -#ifdef _LP64 - static LIR_Opr r8_opr; static LIR_Opr r9_opr; static LIR_Opr r10_opr; @@ -108,30 +101,17 @@ static LIR_Opr r13_metadata_opr; static LIR_Opr r14_metadata_opr; -#endif // _LP64 - static LIR_Opr long0_opr; static LIR_Opr long1_opr; - static LIR_Opr fpu0_float_opr; - static LIR_Opr fpu0_double_opr; static LIR_Opr xmm0_float_opr; static LIR_Opr xmm0_double_opr; -#ifdef _LP64 static LIR_Opr as_long_opr(Register r) { return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); } static LIR_Opr as_pointer_opr(Register r) { return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r)); } -#else - static LIR_Opr as_long_opr(Register r, Register r2) { - return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r2)); - } - static LIR_Opr as_pointer_opr(Register r) { - return LIR_OprFact::single_cpu(cpu_reg2rnr(r)); - } -#endif // _LP64 // VMReg name for spilled physical FPU stack slot n static VMReg fpu_regname (int n); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index ff6d18e48e1a8..303f40e62e8f0 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -162,24 +162,6 @@ address LIR_Assembler::double_constant(double d) { } } -#ifndef _LP64 -void LIR_Assembler::fpop() { - __ fpop(); -} - -void LIR_Assembler::fxch(int i) { - __ fxch(i); -} - -void LIR_Assembler::fld(int i) { - __ fld_s(i); -} - -void LIR_Assembler::ffree(int i) { - __ ffree(i); -} -#endif // !_LP64 - void LIR_Assembler::breakpoint() { __ int3(); } @@ -188,7 +170,6 @@ void LIR_Assembler::push(LIR_Opr opr) { if (opr->is_single_cpu()) { __ push_reg(opr->as_register()); } else if (opr->is_double_cpu()) { - NOT_LP64(__ push_reg(opr->as_register_hi())); __ push_reg(opr->as_register_lo()); } else if (opr->is_stack()) { __ push_addr(frame_map()->address_for_slot(opr->single_stack_ix())); @@ -344,11 +325,9 @@ void LIR_Assembler::clinit_barrier(ciMethod* method) { Label L_skip_barrier; Register klass = rscratch1; - Register thread = LP64_ONLY( r15_thread ) NOT_LP64( noreg ); - assert(thread != noreg, "x86_32 not implemented"); __ mov_metadata(klass, method->holder()->constant_encoding()); - __ clinit_barrier(klass, thread, &L_skip_barrier /*L_fast_path*/); + __ clinit_barrier(klass, r15_thread, &L_skip_barrier /*L_fast_path*/); __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); @@ -420,11 +399,9 @@ int LIR_Assembler::emit_unwind_handler() { int offset = code_offset(); // Fetch the exception from TLS and clear out exception related thread state - Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread)); - __ movptr(rax, Address(thread, JavaThread::exception_oop_offset())); - __ movptr(Address(thread, JavaThread::exception_oop_offset()), NULL_WORD); - __ movptr(Address(thread, JavaThread::exception_pc_offset()), NULL_WORD); + __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); + __ movptr(Address(r15_thread, JavaThread::exception_oop_offset()), NULL_WORD); + __ movptr(Address(r15_thread, JavaThread::exception_pc_offset()), NULL_WORD); __ bind(_unwind_handler_entry); __ verify_not_null_oop(rax); @@ -446,14 +423,8 @@ int LIR_Assembler::emit_unwind_handler() { } if (compilation()->env()->dtrace_method_probes()) { -#ifdef _LP64 __ mov(rdi, r15_thread); __ mov_metadata(rsi, method()->constant_encoding()); -#else - __ get_thread(rax); - __ movptr(Address(rsp, 0), rax); - __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding(), noreg); -#endif __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit))); } @@ -510,15 +481,9 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { // Note: we do not need to round double result; float result has the right precision // the poll sets the condition code, but no data registers -#ifdef _LP64 - const Register thread = r15_thread; -#else - const Register thread = rbx; - __ get_thread(thread); -#endif code_stub->set_safepoint_offset(__ offset()); __ relocate(relocInfo::poll_return_type); - __ safepoint_poll(*code_stub->entry(), thread, true /* at_return */, true /* in_nmethod */); + __ safepoint_poll(*code_stub->entry(), r15_thread, true /* at_return */, true /* in_nmethod */); __ ret(0); } @@ -526,21 +491,14 @@ void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) { guarantee(info != nullptr, "Shouldn't be null"); int offset = __ offset(); -#ifdef _LP64 const Register poll_addr = rscratch1; __ movptr(poll_addr, Address(r15_thread, JavaThread::polling_page_offset())); -#else - assert(tmp->is_cpu_register(), "needed"); - const Register poll_addr = tmp->as_register(); - __ get_thread(poll_addr); - __ movptr(poll_addr, Address(poll_addr, in_bytes(JavaThread::polling_page_offset()))); -#endif add_debug_info_for_branch(info); __ relocate(relocInfo::poll_type); address pre_pc = __ pc(); __ testl(rax, Address(poll_addr, 0)); address post_pc = __ pc(); - guarantee(pointer_delta(post_pc, pre_pc, 1) == 2 LP64_ONLY(+1), "must be exact length"); + guarantee(pointer_delta(post_pc, pre_pc, 1) == 3, "must be exact length"); return offset; } @@ -574,12 +532,7 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod case T_LONG: { assert(patch_code == lir_patch_none, "no patching handled here"); -#ifdef _LP64 __ movptr(dest->as_register_lo(), (intptr_t)c->as_jlong()); -#else - __ movptr(dest->as_register_lo(), c->as_jint_lo()); - __ movptr(dest->as_register_hi(), c->as_jint_hi()); -#endif // _LP64 break; } @@ -603,52 +556,28 @@ void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_cod case T_FLOAT: { if (dest->is_single_xmm()) { - if (LP64_ONLY(UseAVX <= 2 &&) c->is_zero_float()) { + if (UseAVX <= 2 && c->is_zero_float()) { __ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg()); } else { __ movflt(dest->as_xmm_float_reg(), InternalAddress(float_constant(c->as_jfloat()))); } } else { -#ifndef _LP64 - assert(dest->is_single_fpu(), "must be"); - assert(dest->fpu_regnr() == 0, "dest must be TOS"); - if (c->is_zero_float()) { - __ fldz(); - } else if (c->is_one_float()) { - __ fld1(); - } else { - __ fld_s (InternalAddress(float_constant(c->as_jfloat()))); - } -#else ShouldNotReachHere(); -#endif // !_LP64 } break; } case T_DOUBLE: { if (dest->is_double_xmm()) { - if (LP64_ONLY(UseAVX <= 2 &&) c->is_zero_double()) { + if (UseAVX <= 2 && c->is_zero_double()) { __ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg()); } else { __ movdbl(dest->as_xmm_double_reg(), InternalAddress(double_constant(c->as_jdouble()))); } } else { -#ifndef _LP64 - assert(dest->is_double_fpu(), "must be"); - assert(dest->fpu_regnrLo() == 0, "dest must be TOS"); - if (c->is_zero_double()) { - __ fldz(); - } else if (c->is_one_double()) { - __ fld1(); - } else { - __ fld_d (InternalAddress(double_constant(c->as_jdouble()))); - } -#else ShouldNotReachHere(); -#endif // !_LP64 } break; } @@ -679,17 +608,10 @@ void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) { case T_LONG: // fall through case T_DOUBLE: -#ifdef _LP64 __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes), (intptr_t)c->as_jlong_bits(), rscratch1); -#else - __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(), - lo_word_offset_in_bytes), c->as_jint_lo_bits()); - __ movptr(frame_map()->address_for_slot(dest->double_stack_ix(), - hi_word_offset_in_bytes), c->as_jint_hi_bits()); -#endif // _LP64 break; default: @@ -720,20 +642,15 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmi if (UseCompressedOops && !wide) { __ movl(as_Address(addr), NULL_WORD); } else { -#ifdef _LP64 __ xorptr(rscratch1, rscratch1); null_check_here = code_offset(); __ movptr(as_Address(addr), rscratch1); -#else - __ movptr(as_Address(addr), NULL_WORD); -#endif } } else { if (is_literal_address(addr)) { ShouldNotReachHere(); __ movoop(as_Address(addr, noreg), c->as_jobject(), rscratch1); } else { -#ifdef _LP64 __ movoop(rscratch1, c->as_jobject()); if (UseCompressedOops && !wide) { __ encode_heap_oop(rscratch1); @@ -743,16 +660,12 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmi null_check_here = code_offset(); __ movptr(as_Address_lo(addr), rscratch1); } -#else - __ movoop(as_Address(addr), c->as_jobject(), noreg); -#endif } } break; case T_LONG: // fall through case T_DOUBLE: -#ifdef _LP64 if (is_literal_address(addr)) { ShouldNotReachHere(); __ movptr(as_Address(addr, r15_thread), (intptr_t)c->as_jlong_bits()); @@ -761,11 +674,6 @@ void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmi null_check_here = code_offset(); __ movptr(as_Address_lo(addr), r10); } -#else - // Always reachable in 32bit so this doesn't produce useless move literal - __ movptr(as_Address_hi(addr), c->as_jint_hi_bits()); - __ movptr(as_Address_lo(addr), c->as_jint_lo_bits()); -#endif // _LP64 break; case T_BOOLEAN: // fall through @@ -794,13 +702,11 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { // move between cpu-registers if (dest->is_single_cpu()) { -#ifdef _LP64 if (src->type() == T_LONG) { // Can do LONG -> OBJECT move_regs(src->as_register_lo(), dest->as_register()); return; } -#endif assert(src->is_single_cpu(), "must match"); if (src->type() == T_OBJECT) { __ verify_oop(src->as_register()); @@ -808,56 +714,20 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { move_regs(src->as_register(), dest->as_register()); } else if (dest->is_double_cpu()) { -#ifdef _LP64 if (is_reference_type(src->type())) { // Surprising to me but we can see move of a long to t_object __ verify_oop(src->as_register()); move_regs(src->as_register(), dest->as_register_lo()); return; } -#endif assert(src->is_double_cpu(), "must match"); Register f_lo = src->as_register_lo(); Register f_hi = src->as_register_hi(); Register t_lo = dest->as_register_lo(); Register t_hi = dest->as_register_hi(); -#ifdef _LP64 assert(f_hi == f_lo, "must be same"); assert(t_hi == t_lo, "must be same"); move_regs(f_lo, t_lo); -#else - assert(f_lo != f_hi && t_lo != t_hi, "invalid register allocation"); - - - if (f_lo == t_hi && f_hi == t_lo) { - swap_reg(f_lo, f_hi); - } else if (f_hi == t_lo) { - assert(f_lo != t_hi, "overwriting register"); - move_regs(f_hi, t_hi); - move_regs(f_lo, t_lo); - } else { - assert(f_hi != t_lo, "overwriting register"); - move_regs(f_lo, t_lo); - move_regs(f_hi, t_hi); - } -#endif // LP64 - -#ifndef _LP64 - // special moves from fpu-register to xmm-register - // necessary for method results - } else if (src->is_single_xmm() && !dest->is_single_xmm()) { - __ movflt(Address(rsp, 0), src->as_xmm_float_reg()); - __ fld_s(Address(rsp, 0)); - } else if (src->is_double_xmm() && !dest->is_double_xmm()) { - __ movdbl(Address(rsp, 0), src->as_xmm_double_reg()); - __ fld_d(Address(rsp, 0)); - } else if (dest->is_single_xmm() && !src->is_single_xmm()) { - __ fstp_s(Address(rsp, 0)); - __ movflt(dest->as_xmm_float_reg(), Address(rsp, 0)); - } else if (dest->is_double_xmm() && !src->is_double_xmm()) { - __ fstp_d(Address(rsp, 0)); - __ movdbl(dest->as_xmm_double_reg(), Address(rsp, 0)); -#endif // !_LP64 // move between xmm-registers } else if (dest->is_single_xmm()) { @@ -867,13 +737,6 @@ void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) { assert(src->is_double_xmm(), "must match"); __ movdbl(dest->as_xmm_double_reg(), src->as_xmm_double_reg()); -#ifndef _LP64 - // move between fpu-registers (no instruction necessary because of fpu-stack) - } else if (dest->is_single_fpu() || dest->is_double_fpu()) { - assert(src->is_single_fpu() || src->is_double_fpu(), "must match"); - assert(src->fpu() == dest->fpu(), "currently should be nothing to do"); -#endif // !_LP64 - } else { ShouldNotReachHere(); } @@ -898,7 +761,6 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po Address dstLO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes); Address dstHI = frame_map()->address_for_slot(dest->double_stack_ix(), hi_word_offset_in_bytes); __ movptr (dstLO, src->as_register_lo()); - NOT_LP64(__ movptr (dstHI, src->as_register_hi())); } else if (src->is_single_xmm()) { Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix()); @@ -908,20 +770,6 @@ void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool po Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix()); __ movdbl(dst_addr, src->as_xmm_double_reg()); -#ifndef _LP64 - } else if (src->is_single_fpu()) { - assert(src->fpu_regnr() == 0, "argument must be on TOS"); - Address dst_addr = frame_map()->address_for_slot(dest->single_stack_ix()); - if (pop_fpu_stack) __ fstp_s (dst_addr); - else __ fst_s (dst_addr); - - } else if (src->is_double_fpu()) { - assert(src->fpu_regnrLo() == 0, "argument must be on TOS"); - Address dst_addr = frame_map()->address_for_slot(dest->double_stack_ix()); - if (pop_fpu_stack) __ fstp_d (dst_addr); - else __ fst_d (dst_addr); -#endif // !_LP64 - } else { ShouldNotReachHere(); } @@ -935,7 +783,6 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch if (is_reference_type(type)) { __ verify_oop(src->as_register()); -#ifdef _LP64 if (UseCompressedOops && !wide) { __ movptr(compressed_src, src->as_register()); __ encode_heap_oop(compressed_src); @@ -943,7 +790,6 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch info->oop_map()->set_narrowoop(compressed_src->as_VMReg()); } } -#endif } if (patch_code != lir_patch_none) { @@ -955,36 +801,14 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch int null_check_here = code_offset(); switch (type) { case T_FLOAT: { -#ifdef _LP64 assert(src->is_single_xmm(), "not a float"); __ movflt(as_Address(to_addr), src->as_xmm_float_reg()); -#else - if (src->is_single_xmm()) { - __ movflt(as_Address(to_addr), src->as_xmm_float_reg()); - } else { - assert(src->is_single_fpu(), "must be"); - assert(src->fpu_regnr() == 0, "argument must be on TOS"); - if (pop_fpu_stack) __ fstp_s(as_Address(to_addr)); - else __ fst_s (as_Address(to_addr)); - } -#endif // _LP64 break; } case T_DOUBLE: { -#ifdef _LP64 assert(src->is_double_xmm(), "not a double"); __ movdbl(as_Address(to_addr), src->as_xmm_double_reg()); -#else - if (src->is_double_xmm()) { - __ movdbl(as_Address(to_addr), src->as_xmm_double_reg()); - } else { - assert(src->is_double_fpu(), "must be"); - assert(src->fpu_regnrLo() == 0, "argument must be on TOS"); - if (pop_fpu_stack) __ fstp_d(as_Address(to_addr)); - else __ fst_d (as_Address(to_addr)); - } -#endif // _LP64 break; } @@ -1001,8 +825,7 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch // a dtrace runtime call. This can't work on 64 bit with // compressed klass ptrs: T_METADATA can be a compressed klass // ptr or a 64 bit method pointer. - LP64_ONLY(ShouldNotReachHere()); - __ movptr(as_Address(to_addr), src->as_register()); + ShouldNotReachHere(); break; case T_ADDRESS: __ movptr(as_Address(to_addr), src->as_register()); @@ -1014,35 +837,7 @@ void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch case T_LONG: { Register from_lo = src->as_register_lo(); Register from_hi = src->as_register_hi(); -#ifdef _LP64 __ movptr(as_Address_lo(to_addr), from_lo); -#else - Register base = to_addr->base()->as_register(); - Register index = noreg; - if (to_addr->index()->is_register()) { - index = to_addr->index()->as_register(); - } - if (base == from_lo || index == from_lo) { - assert(base != from_hi, "can't be"); - assert(index == noreg || (index != base && index != from_hi), "can't handle this"); - __ movl(as_Address_hi(to_addr), from_hi); - if (patch != nullptr) { - patching_epilog(patch, lir_patch_high, base, info); - patch = new PatchingStub(_masm, PatchingStub::access_field_id); - patch_code = lir_patch_low; - } - __ movl(as_Address_lo(to_addr), from_lo); - } else { - assert(index == noreg || (index != base && index != from_lo), "can't handle this"); - __ movl(as_Address_lo(to_addr), from_lo); - if (patch != nullptr) { - patching_epilog(patch, lir_patch_low, base, info); - patch = new PatchingStub(_masm, PatchingStub::access_field_id); - patch_code = lir_patch_high; - } - __ movl(as_Address_hi(to_addr), from_hi); - } -#endif // _LP64 break; } @@ -1091,7 +886,6 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes); Address src_addr_HI = frame_map()->address_for_slot(src->double_stack_ix(), hi_word_offset_in_bytes); __ movptr(dest->as_register_lo(), src_addr_LO); - NOT_LP64(__ movptr(dest->as_register_hi(), src_addr_HI)); } else if (dest->is_single_xmm()) { Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); @@ -1101,18 +895,6 @@ void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) { Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); __ movdbl(dest->as_xmm_double_reg(), src_addr); -#ifndef _LP64 - } else if (dest->is_single_fpu()) { - assert(dest->fpu_regnr() == 0, "dest must be TOS"); - Address src_addr = frame_map()->address_for_slot(src->single_stack_ix()); - __ fld_s(src_addr); - - } else if (dest->is_double_fpu()) { - assert(dest->fpu_regnrLo() == 0, "dest must be TOS"); - Address src_addr = frame_map()->address_for_slot(src->double_stack_ix()); - __ fld_d(src_addr); -#endif // _LP64 - } else { ShouldNotReachHere(); } @@ -1125,28 +907,13 @@ void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) { __ pushptr(frame_map()->address_for_slot(src ->single_stack_ix())); __ popptr (frame_map()->address_for_slot(dest->single_stack_ix())); } else { -#ifndef _LP64 - __ pushl(frame_map()->address_for_slot(src ->single_stack_ix())); - __ popl (frame_map()->address_for_slot(dest->single_stack_ix())); -#else - //no pushl on 64bits __ movl(rscratch1, frame_map()->address_for_slot(src ->single_stack_ix())); __ movl(frame_map()->address_for_slot(dest->single_stack_ix()), rscratch1); -#endif } } else if (src->is_double_stack()) { -#ifdef _LP64 __ pushptr(frame_map()->address_for_slot(src ->double_stack_ix())); __ popptr (frame_map()->address_for_slot(dest->double_stack_ix())); -#else - __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 0)); - // push and pop the part at src + wordSize, adding wordSize for the previous push - __ pushl(frame_map()->address_for_slot(src ->double_stack_ix(), 2 * wordSize)); - __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 2 * wordSize)); - __ popl (frame_map()->address_for_slot(dest->double_stack_ix(), 0)); -#endif // _LP64 - } else { ShouldNotReachHere(); } @@ -1195,13 +962,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch if (dest->is_single_xmm()) { __ movflt(dest->as_xmm_float_reg(), from_addr); } else { -#ifndef _LP64 - assert(dest->is_single_fpu(), "must be"); - assert(dest->fpu_regnr() == 0, "dest must be TOS"); - __ fld_s(from_addr); -#else ShouldNotReachHere(); -#endif // !LP64 } break; } @@ -1210,13 +971,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch if (dest->is_double_xmm()) { __ movdbl(dest->as_xmm_double_reg(), from_addr); } else { -#ifndef _LP64 - assert(dest->is_double_fpu(), "must be"); - assert(dest->fpu_regnrLo() == 0, "dest must be TOS"); - __ fld_d(from_addr); -#else ShouldNotReachHere(); -#endif // !LP64 } break; } @@ -1240,44 +995,7 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch case T_LONG: { Register to_lo = dest->as_register_lo(); Register to_hi = dest->as_register_hi(); -#ifdef _LP64 __ movptr(to_lo, as_Address_lo(addr)); -#else - Register base = addr->base()->as_register(); - Register index = noreg; - if (addr->index()->is_register()) { - index = addr->index()->as_register(); - } - if ((base == to_lo && index == to_hi) || - (base == to_hi && index == to_lo)) { - // addresses with 2 registers are only formed as a result of - // array access so this code will never have to deal with - // patches or null checks. - assert(info == nullptr && patch == nullptr, "must be"); - __ lea(to_hi, as_Address(addr)); - __ movl(to_lo, Address(to_hi, 0)); - __ movl(to_hi, Address(to_hi, BytesPerWord)); - } else if (base == to_lo || index == to_lo) { - assert(base != to_hi, "can't be"); - assert(index == noreg || (index != base && index != to_hi), "can't handle this"); - __ movl(to_hi, as_Address_hi(addr)); - if (patch != nullptr) { - patching_epilog(patch, lir_patch_high, base, info); - patch = new PatchingStub(_masm, PatchingStub::access_field_id); - patch_code = lir_patch_low; - } - __ movl(to_lo, as_Address_lo(addr)); - } else { - assert(index == noreg || (index != base && index != to_lo), "can't handle this"); - __ movl(to_lo, as_Address_lo(addr)); - if (patch != nullptr) { - patching_epilog(patch, lir_patch_low, base, info); - patch = new PatchingStub(_masm, PatchingStub::access_field_id); - patch_code = lir_patch_high; - } - __ movl(to_hi, as_Address_hi(addr)); - } -#endif // _LP64 break; } @@ -1327,12 +1045,9 @@ void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_Patch } if (is_reference_type(type)) { -#ifdef _LP64 if (UseCompressedOops && !wide) { __ decode_heap_oop(dest->as_register()); } -#endif - __ verify_oop(dest->as_register()); } } @@ -1426,21 +1141,11 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { switch (op->bytecode()) { case Bytecodes::_i2l: -#ifdef _LP64 __ movl2ptr(dest->as_register_lo(), src->as_register()); -#else - move_regs(src->as_register(), dest->as_register_lo()); - move_regs(src->as_register(), dest->as_register_hi()); - __ sarl(dest->as_register_hi(), 31); -#endif // LP64 break; case Bytecodes::_l2i: -#ifdef _LP64 __ movl(dest->as_register(), src->as_register_lo()); -#else - move_regs(src->as_register_lo(), dest->as_register()); -#endif break; case Bytecodes::_i2b: @@ -1458,8 +1163,6 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { __ sign_extend_short(dest->as_register()); break; - -#ifdef _LP64 case Bytecodes::_f2d: __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg()); break; @@ -1499,74 +1202,6 @@ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { case Bytecodes::_d2l: __ convert_d2l(dest->as_register_lo(), src->as_xmm_double_reg()); break; -#else - case Bytecodes::_f2d: - case Bytecodes::_d2f: - if (dest->is_single_xmm()) { - __ cvtsd2ss(dest->as_xmm_float_reg(), src->as_xmm_double_reg()); - } else if (dest->is_double_xmm()) { - __ cvtss2sd(dest->as_xmm_double_reg(), src->as_xmm_float_reg()); - } else { - assert(src->fpu() == dest->fpu(), "register must be equal"); - // do nothing (float result is rounded later through spilling) - } - break; - - case Bytecodes::_i2f: - case Bytecodes::_i2d: - if (dest->is_single_xmm()) { - __ cvtsi2ssl(dest->as_xmm_float_reg(), src->as_register()); - } else if (dest->is_double_xmm()) { - __ cvtsi2sdl(dest->as_xmm_double_reg(), src->as_register()); - } else { - assert(dest->fpu() == 0, "result must be on TOS"); - __ movl(Address(rsp, 0), src->as_register()); - __ fild_s(Address(rsp, 0)); - } - break; - - case Bytecodes::_l2f: - case Bytecodes::_l2d: - assert(!dest->is_xmm_register(), "result in xmm register not supported (no SSE instruction present)"); - assert(dest->fpu() == 0, "result must be on TOS"); - __ movptr(Address(rsp, 0), src->as_register_lo()); - __ movl(Address(rsp, BytesPerWord), src->as_register_hi()); - __ fild_d(Address(rsp, 0)); - // float result is rounded later through spilling - break; - - case Bytecodes::_f2i: - case Bytecodes::_d2i: - if (src->is_single_xmm()) { - __ cvttss2sil(dest->as_register(), src->as_xmm_float_reg()); - } else if (src->is_double_xmm()) { - __ cvttsd2sil(dest->as_register(), src->as_xmm_double_reg()); - } else { - assert(src->fpu() == 0, "input must be on TOS"); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); - __ fist_s(Address(rsp, 0)); - __ movl(dest->as_register(), Address(rsp, 0)); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // IA32 conversion instructions do not match JLS for overflow, underflow and NaN -> fixup in stub - assert(op->stub() != nullptr, "stub required"); - __ cmpl(dest->as_register(), 0x80000000); - __ jcc(Assembler::equal, *op->stub()->entry()); - __ bind(*op->stub()->continuation()); - break; - - case Bytecodes::_f2l: - case Bytecodes::_d2l: - assert(!src->is_xmm_register(), "input in xmm register not supported (no SSE instruction present)"); - assert(src->fpu() == 0, "input must be on TOS"); - assert(dest == FrameMap::long0_opr, "runtime stub places result in these registers"); - - // instruction sequence too long to inline it here - { - __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::fpu2long_stub_id))); - } - break; -#endif // _LP64 default: ShouldNotReachHere(); } @@ -1593,7 +1228,7 @@ void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) { void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) { Register len = op->len()->as_register(); - LP64_ONLY( __ movslq(len, len); ) + __ movslq(len, len); if (UseSlowPath || (!UseFastNewObjectArray && is_reference_type(op->type())) || @@ -1661,7 +1296,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L Register dst = op->result_opr()->as_register(); ciKlass* k = op->klass(); Register Rtmp1 = noreg; - Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Register tmp_load_klass = rscratch1; // check if it needs to be profiled ciMethodData* md = nullptr; @@ -1723,29 +1358,19 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L if (!k->is_loaded()) { klass2reg_with_patching(k_RInfo, op->info_for_patch()); } else { -#ifdef _LP64 __ mov_metadata(k_RInfo, k->constant_encoding()); -#endif // _LP64 } __ verify_oop(obj); if (op->fast_check()) { // get object class // not a safepoint as obj null check happens earlier -#ifdef _LP64 if (UseCompressedClassPointers) { __ load_klass(Rtmp1, obj, tmp_load_klass); __ cmpptr(k_RInfo, Rtmp1); } else { __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes())); } -#else - if (k->is_loaded()) { - __ cmpklass(Address(obj, oopDesc::klass_offset_in_bytes()), k->constant_encoding()); - } else { - __ cmpptr(k_RInfo, Address(obj, oopDesc::klass_offset_in_bytes())); - } -#endif __ jcc(Assembler::notEqual, *failure_target); // successful cast, fall through to profile or jump } else { @@ -1754,11 +1379,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L __ load_klass(klass_RInfo, obj, tmp_load_klass); if (k->is_loaded()) { // See if we get an immediate positive hit -#ifdef _LP64 __ cmpptr(k_RInfo, Address(klass_RInfo, k->super_check_offset())); -#else - __ cmpklass(Address(klass_RInfo, k->super_check_offset()), k->constant_encoding()); -#endif // _LP64 if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) { __ jcc(Assembler::notEqual, *failure_target); // successful cast, fall through to profile or jump @@ -1766,19 +1387,11 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L // See if we get an immediate positive hit __ jcc(Assembler::equal, *success_target); // check for self -#ifdef _LP64 __ cmpptr(klass_RInfo, k_RInfo); -#else - __ cmpklass(klass_RInfo, k->constant_encoding()); -#endif // _LP64 __ jcc(Assembler::equal, *success_target); __ push(klass_RInfo); -#ifdef _LP64 __ push(k_RInfo); -#else - __ pushklass(k->constant_encoding(), noreg); -#endif // _LP64 __ call(RuntimeAddress(Runtime1::entry_for(C1StubId::slow_subtype_check_id))); __ pop(klass_RInfo); __ pop(klass_RInfo); @@ -1807,7 +1420,7 @@ void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, L void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { - Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Register tmp_load_klass = rscratch1; LIR_Code code = op->code(); if (code == lir_store_check) { Register value = op->object()->as_register(); @@ -1911,17 +1524,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { - if (LP64_ONLY(false &&) op->code() == lir_cas_long) { - assert(op->cmp_value()->as_register_lo() == rax, "wrong register"); - assert(op->cmp_value()->as_register_hi() == rdx, "wrong register"); - assert(op->new_value()->as_register_lo() == rbx, "wrong register"); - assert(op->new_value()->as_register_hi() == rcx, "wrong register"); - Register addr = op->addr()->as_register(); - __ lock(); - NOT_LP64(__ cmpxchg8(Address(addr, 0))); - - } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj ) { - NOT_LP64(assert(op->addr()->is_single_cpu(), "must be single");) + if (op->code() == lir_cas_int || op->code() == lir_cas_obj ) { Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo()); Register newval = op->new_value()->as_register(); Register cmpval = op->cmp_value()->as_register(); @@ -1931,8 +1534,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { assert(cmpval != addr, "cmp and addr must be in different registers"); assert(newval != addr, "new value and addr must be in different registers"); - if ( op->code() == lir_cas_obj) { -#ifdef _LP64 + if (op->code() == lir_cas_obj) { if (UseCompressedOops) { __ encode_heap_oop(cmpval); __ mov(rscratch1, newval); @@ -1940,9 +1542,7 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { __ lock(); // cmpval (rax) is implicitly used by this instruction __ cmpxchgl(rscratch1, Address(addr, 0)); - } else -#endif - { + } else { __ lock(); __ cmpxchgptr(newval, Address(addr, 0)); } @@ -1951,7 +1551,6 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { __ lock(); __ cmpxchgl(newval, Address(addr, 0)); } -#ifdef _LP64 } else if (op->code() == lir_cas_long) { Register addr = (op->addr()->is_single_cpu() ? op->addr()->as_register() : op->addr()->as_register_lo()); Register newval = op->new_value()->as_register_lo(); @@ -1963,7 +1562,6 @@ void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) { assert(newval != addr, "new value and addr must be in different registers"); __ lock(); __ cmpxchgq(newval, Address(addr, 0)); -#endif // _LP64 } else { Unimplemented(); } @@ -2006,12 +1604,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L assert(opr2->cpu_regnrLo() != result->cpu_regnrLo() && opr2->cpu_regnrLo() != result->cpu_regnrHi(), "opr2 already overwritten by previous move"); assert(opr2->cpu_regnrHi() != result->cpu_regnrLo() && opr2->cpu_regnrHi() != result->cpu_regnrHi(), "opr2 already overwritten by previous move"); __ cmovptr(ncond, result->as_register_lo(), opr2->as_register_lo()); - NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), opr2->as_register_hi());) } else if (opr2->is_single_stack()) { __ cmovl(ncond, result->as_register(), frame_map()->address_for_slot(opr2->single_stack_ix())); } else if (opr2->is_double_stack()) { __ cmovptr(ncond, result->as_register_lo(), frame_map()->address_for_slot(opr2->double_stack_ix(), lo_word_offset_in_bytes)); - NOT_LP64(__ cmovptr(ncond, result->as_register_hi(), frame_map()->address_for_slot(opr2->double_stack_ix(), hi_word_offset_in_bytes));) } else { ShouldNotReachHere(); } @@ -2087,28 +1683,16 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr // cpu register - cpu register Register rreg_lo = right->as_register_lo(); Register rreg_hi = right->as_register_hi(); - NOT_LP64(assert_different_registers(lreg_lo, lreg_hi, rreg_lo, rreg_hi)); - LP64_ONLY(assert_different_registers(lreg_lo, rreg_lo)); + assert_different_registers(lreg_lo, rreg_lo); switch (code) { case lir_add: __ addptr(lreg_lo, rreg_lo); - NOT_LP64(__ adcl(lreg_hi, rreg_hi)); break; case lir_sub: __ subptr(lreg_lo, rreg_lo); - NOT_LP64(__ sbbl(lreg_hi, rreg_hi)); break; case lir_mul: -#ifdef _LP64 __ imulq(lreg_lo, rreg_lo); -#else - assert(lreg_lo == rax && lreg_hi == rdx, "must be"); - __ imull(lreg_hi, rreg_lo); - __ imull(rreg_hi, lreg_lo); - __ addl (rreg_hi, lreg_hi); - __ mull (rreg_lo); - __ addl (lreg_hi, rreg_hi); -#endif // _LP64 break; default: ShouldNotReachHere(); @@ -2116,7 +1700,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr } else if (right->is_constant()) { // cpu register - constant -#ifdef _LP64 jlong c = right->as_constant_ptr()->as_jlong_bits(); __ movptr(r10, (intptr_t) c); switch (code) { @@ -2129,23 +1712,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr default: ShouldNotReachHere(); } -#else - jint c_lo = right->as_constant_ptr()->as_jint_lo(); - jint c_hi = right->as_constant_ptr()->as_jint_hi(); - switch (code) { - case lir_add: - __ addptr(lreg_lo, c_lo); - __ adcl(lreg_hi, c_hi); - break; - case lir_sub: - __ subptr(lreg_lo, c_lo); - __ sbbl(lreg_hi, c_hi); - break; - default: - ShouldNotReachHere(); - } -#endif // _LP64 - } else { ShouldNotReachHere(); } @@ -2214,80 +1780,6 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr } } -#ifndef _LP64 - } else if (left->is_single_fpu()) { - assert(dest->is_single_fpu(), "fpu stack allocation required"); - - if (right->is_single_fpu()) { - arith_fpu_implementation(code, left->fpu_regnr(), right->fpu_regnr(), dest->fpu_regnr(), pop_fpu_stack); - - } else { - assert(left->fpu_regnr() == 0, "left must be on TOS"); - assert(dest->fpu_regnr() == 0, "dest must be on TOS"); - - Address raddr; - if (right->is_single_stack()) { - raddr = frame_map()->address_for_slot(right->single_stack_ix()); - } else if (right->is_constant()) { - address const_addr = float_constant(right->as_jfloat()); - assert(const_addr != nullptr, "incorrect float/double constant maintenance"); - // hack for now - raddr = __ as_Address(InternalAddress(const_addr)); - } else { - ShouldNotReachHere(); - } - - switch (code) { - case lir_add: __ fadd_s(raddr); break; - case lir_sub: __ fsub_s(raddr); break; - case lir_mul: __ fmul_s(raddr); break; - case lir_div: __ fdiv_s(raddr); break; - default: ShouldNotReachHere(); - } - } - - } else if (left->is_double_fpu()) { - assert(dest->is_double_fpu(), "fpu stack allocation required"); - - if (code == lir_mul || code == lir_div) { - // Double values require special handling for strictfp mul/div on x86 - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1())); - __ fmulp(left->fpu_regnrLo() + 1); - } - - if (right->is_double_fpu()) { - arith_fpu_implementation(code, left->fpu_regnrLo(), right->fpu_regnrLo(), dest->fpu_regnrLo(), pop_fpu_stack); - - } else { - assert(left->fpu_regnrLo() == 0, "left must be on TOS"); - assert(dest->fpu_regnrLo() == 0, "dest must be on TOS"); - - Address raddr; - if (right->is_double_stack()) { - raddr = frame_map()->address_for_slot(right->double_stack_ix()); - } else if (right->is_constant()) { - // hack for now - raddr = __ as_Address(InternalAddress(double_constant(right->as_jdouble()))); - } else { - ShouldNotReachHere(); - } - - switch (code) { - case lir_add: __ fadd_d(raddr); break; - case lir_sub: __ fsub_d(raddr); break; - case lir_mul: __ fmul_d(raddr); break; - case lir_div: __ fdiv_d(raddr); break; - default: ShouldNotReachHere(); - } - } - - if (code == lir_mul || code == lir_div) { - // Double values require special handling for strictfp mul/div on x86 - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2())); - __ fmulp(dest->fpu_regnrLo() + 1); - } -#endif // !_LP64 - } else if (left->is_single_stack() || left->is_address()) { assert(left == dest, "left and dest must be equal"); @@ -2329,77 +1821,15 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr } } -#ifndef _LP64 -void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { - assert(pop_fpu_stack || (left_index == dest_index || right_index == dest_index), "invalid LIR"); - assert(!pop_fpu_stack || (left_index - 1 == dest_index || right_index - 1 == dest_index), "invalid LIR"); - assert(left_index == 0 || right_index == 0, "either must be on top of stack"); - - bool left_is_tos = (left_index == 0); - bool dest_is_tos = (dest_index == 0); - int non_tos_index = (left_is_tos ? right_index : left_index); - - switch (code) { - case lir_add: - if (pop_fpu_stack) __ faddp(non_tos_index); - else if (dest_is_tos) __ fadd (non_tos_index); - else __ fadda(non_tos_index); - break; - - case lir_sub: - if (left_is_tos) { - if (pop_fpu_stack) __ fsubrp(non_tos_index); - else if (dest_is_tos) __ fsub (non_tos_index); - else __ fsubra(non_tos_index); - } else { - if (pop_fpu_stack) __ fsubp (non_tos_index); - else if (dest_is_tos) __ fsubr (non_tos_index); - else __ fsuba (non_tos_index); - } - break; - - case lir_mul: - if (pop_fpu_stack) __ fmulp(non_tos_index); - else if (dest_is_tos) __ fmul (non_tos_index); - else __ fmula(non_tos_index); - break; - - case lir_div: - if (left_is_tos) { - if (pop_fpu_stack) __ fdivrp(non_tos_index); - else if (dest_is_tos) __ fdiv (non_tos_index); - else __ fdivra(non_tos_index); - } else { - if (pop_fpu_stack) __ fdivp (non_tos_index); - else if (dest_is_tos) __ fdivr (non_tos_index); - else __ fdiva (non_tos_index); - } - break; - - case lir_rem: - assert(left_is_tos && dest_is_tos && right_index == 1, "must be guaranteed by FPU stack allocation"); - __ fremr(noreg); - break; - - default: - ShouldNotReachHere(); - } -} -#endif // _LP64 - - void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) { if (value->is_double_xmm()) { switch(code) { case lir_abs : { -#ifdef _LP64 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { assert(tmp->is_valid(), "need temporary"); __ vpandn(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), value->as_xmm_double_reg(), 2); - } else -#endif - { + } else { if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) { __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg()); } @@ -2416,15 +1846,6 @@ void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_ default : ShouldNotReachHere(); } -#ifndef _LP64 - } else if (value->is_double_fpu()) { - assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS"); - switch(code) { - case lir_abs : __ fabs() ; break; - case lir_sqrt : __ fsqrt(); break; - default : ShouldNotReachHere(); - } -#endif // !_LP64 } else if (code == lir_f2hf) { __ flt_to_flt16(dest->as_register(), value->as_xmm_float_reg(), tmp->as_xmm_float_reg()); } else if (code == lir_hf2f) { @@ -2469,7 +1890,6 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr Register l_lo = left->as_register_lo(); Register l_hi = left->as_register_hi(); if (right->is_constant()) { -#ifdef _LP64 __ mov64(rscratch1, right->as_constant_ptr()->as_jlong()); switch (code) { case lir_logic_and: @@ -2483,50 +1903,22 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr break; default: ShouldNotReachHere(); } -#else - int r_lo = right->as_constant_ptr()->as_jint_lo(); - int r_hi = right->as_constant_ptr()->as_jint_hi(); - switch (code) { - case lir_logic_and: - __ andl(l_lo, r_lo); - __ andl(l_hi, r_hi); - break; - case lir_logic_or: - __ orl(l_lo, r_lo); - __ orl(l_hi, r_hi); - break; - case lir_logic_xor: - __ xorl(l_lo, r_lo); - __ xorl(l_hi, r_hi); - break; - default: ShouldNotReachHere(); - } -#endif // _LP64 } else { -#ifdef _LP64 Register r_lo; if (is_reference_type(right->type())) { r_lo = right->as_register(); } else { r_lo = right->as_register_lo(); } -#else - Register r_lo = right->as_register_lo(); - Register r_hi = right->as_register_hi(); - assert(l_lo != r_hi, "overwriting registers"); -#endif switch (code) { case lir_logic_and: __ andptr(l_lo, r_lo); - NOT_LP64(__ andptr(l_hi, r_hi);) break; case lir_logic_or: __ orptr(l_lo, r_lo); - NOT_LP64(__ orptr(l_hi, r_hi);) break; case lir_logic_xor: __ xorptr(l_lo, r_lo); - NOT_LP64(__ xorptr(l_hi, r_hi);) break; default: ShouldNotReachHere(); } @@ -2535,19 +1927,7 @@ void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr Register dst_lo = dst->as_register_lo(); Register dst_hi = dst->as_register_hi(); -#ifdef _LP64 move_regs(l_lo, dst_lo); -#else - if (dst_lo == l_hi) { - assert(dst_hi != l_lo, "overwriting registers"); - move_regs(l_hi, dst_hi); - move_regs(l_lo, dst_lo); - } else { - assert(dst_lo != l_hi, "overwriting registers"); - move_regs(l_lo, dst_lo); - move_regs(l_hi, dst_hi); - } -#endif // _LP64 } } @@ -2675,27 +2055,11 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, Register xlo = opr1->as_register_lo(); Register xhi = opr1->as_register_hi(); if (opr2->is_double_cpu()) { -#ifdef _LP64 __ cmpptr(xlo, opr2->as_register_lo()); -#else - // cpu register - cpu register - Register ylo = opr2->as_register_lo(); - Register yhi = opr2->as_register_hi(); - __ subl(xlo, ylo); - __ sbbl(xhi, yhi); - if (condition == lir_cond_equal || condition == lir_cond_notEqual) { - __ orl(xhi, xlo); - } -#endif // _LP64 } else if (opr2->is_constant()) { // cpu register - constant 0 assert(opr2->as_jlong() == (jlong)0, "only handles zero"); -#ifdef _LP64 __ cmpptr(xlo, (int32_t)opr2->as_jlong()); -#else - assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "only handles equals case"); - __ orl(xhi, xlo); -#endif // _LP64 } else { ShouldNotReachHere(); } @@ -2742,21 +2106,12 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, ShouldNotReachHere(); } -#ifndef _LP64 - } else if(opr1->is_single_fpu() || opr1->is_double_fpu()) { - assert(opr1->is_fpu_register() && opr1->fpu() == 0, "currently left-hand side must be on TOS (relax this restriction)"); - assert(opr2->is_fpu_register(), "both must be registers"); - __ fcmp(noreg, opr2->fpu(), op->fpu_pop_count() > 0, op->fpu_pop_count() > 1); -#endif // LP64 - } else if (opr1->is_address() && opr2->is_constant()) { LIR_Const* c = opr2->as_constant_ptr(); -#ifdef _LP64 if (is_reference_type(c->type())) { assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "need to reverse"); __ movoop(rscratch1, c->as_jobject()); } -#endif // LP64 if (op->info() != nullptr) { add_debug_info_for_null_check_here(op->info()); } @@ -2765,13 +2120,9 @@ void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, if (c->type() == T_INT) { __ cmpl(as_Address(addr), c->as_jint()); } else if (is_reference_type(c->type())) { -#ifdef _LP64 // %%% Make this explode if addr isn't reachable until we figure out a // better strategy by giving noreg as the temp for as_Address __ cmpoop(rscratch1, as_Address(addr, noreg)); -#else - __ cmpoop(as_Address(addr), c->as_jobject()); -#endif // _LP64 } else { ShouldNotReachHere(); } @@ -2791,20 +2142,10 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op __ cmpsd2int(left->as_xmm_double_reg(), right->as_xmm_double_reg(), dst->as_register(), code == lir_ucmp_fd2i); } else { -#ifdef _LP64 ShouldNotReachHere(); -#else - assert(left->is_single_fpu() || left->is_double_fpu(), "must be"); - assert(right->is_single_fpu() || right->is_double_fpu(), "must match"); - - assert(left->fpu() == 0, "left must be on TOS"); - __ fcmp2int(dst->as_register(), code == lir_ucmp_fd2i, right->fpu(), - op->fpu_pop_count() > 0, op->fpu_pop_count() > 1); -#endif // LP64 } } else { assert(code == lir_cmp_l2i, "check"); -#ifdef _LP64 Label done; Register dest = dst->as_register(); __ cmpptr(left->as_register_lo(), right->as_register_lo()); @@ -2813,13 +2154,6 @@ void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Op __ setb(Assembler::notZero, dest); __ movzbl(dest, dest); __ bind(done); -#else - __ lcmp2int(left->as_register_hi(), - left->as_register_lo(), - right->as_register_hi(), - right->as_register_lo()); - move_regs(left->as_register_hi(), dst->as_register()); -#endif // _LP64 } } @@ -2945,22 +2279,12 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr Register lo = left->as_register_lo(); Register hi = left->as_register_hi(); assert(lo != SHIFT_count && hi != SHIFT_count, "left cannot be ECX"); -#ifdef _LP64 switch (code) { case lir_shl: __ shlptr(lo); break; case lir_shr: __ sarptr(lo); break; case lir_ushr: __ shrptr(lo); break; default: ShouldNotReachHere(); } -#else - - switch (code) { - case lir_shl: __ lshl(hi, lo); break; - case lir_shr: __ lshr(hi, lo, true); break; - case lir_ushr: __ lshr(hi, lo, false); break; - default: ShouldNotReachHere(); - } -#endif // LP64 } else { ShouldNotReachHere(); } @@ -2981,9 +2305,6 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr de default: ShouldNotReachHere(); } } else if (dest->is_double_cpu()) { -#ifndef _LP64 - Unimplemented(); -#else // first move left into dest so that left is not destroyed by the shift Register value = dest->as_register_lo(); count = count & 0x1F; // Java spec @@ -2995,7 +2316,6 @@ void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr de case lir_ushr: __ shrptr(value, count); break; default: ShouldNotReachHere(); } -#endif // _LP64 } else { ShouldNotReachHere(); } @@ -3045,7 +2365,7 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { Register dst_pos = op->dst_pos()->as_register(); Register length = op->length()->as_register(); Register tmp = op->tmp()->as_register(); - Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Register tmp_load_klass = rscratch1; Register tmp2 = UseCompactObjectHeaders ? rscratch2 : noreg; CodeStub* stub = op->stub(); @@ -3070,13 +2390,11 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { // these are just temporary placements until we need to reload store_parameter(src_pos, 3); store_parameter(src, 4); - NOT_LP64(assert(src == rcx && src_pos == rdx, "mismatch in calling convention");) address copyfunc_addr = StubRoutines::generic_arraycopy(); assert(copyfunc_addr != nullptr, "generic arraycopy stub required"); // pass arguments: may push as this is not a safepoint; SP must be fix at each safepoint -#ifdef _LP64 // The arguments are in java calling convention so we can trivially shift them to C // convention assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4); @@ -3107,21 +2425,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { #endif __ call(RuntimeAddress(copyfunc_addr)); #endif // _WIN64 -#else - __ push(length); - __ push(dst_pos); - __ push(dst); - __ push(src_pos); - __ push(src); - -#ifndef PRODUCT - if (PrintC1Statistics) { - __ incrementl(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt), rscratch1); - } -#endif - __ call_VM_leaf(copyfunc_addr, 5); // removes pushed parameter from the stack - -#endif // _LP64 __ testl(rax, rax); __ jcc(Assembler::equal, *stub->continuation()); @@ -3227,10 +2530,8 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ jcc(Assembler::less, *stub->entry()); } -#ifdef _LP64 __ movl2ptr(src_pos, src_pos); //higher 32bits must be null __ movl2ptr(dst_pos, dst_pos); //higher 32bits must be null -#endif if (flags & LIR_OpArrayCopy::type_check) { // We don't know the array types are compatible @@ -3294,21 +2595,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { store_parameter(src_pos, 3); store_parameter(src, 4); -#ifndef _LP64 - Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes()); - __ movptr(tmp, dst_klass_addr); - __ movptr(tmp, Address(tmp, ObjArrayKlass::element_klass_offset())); - __ push(tmp); - __ movl(tmp, Address(tmp, Klass::super_check_offset_offset())); - __ push(tmp); - __ push(length); - __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); - __ push(tmp); - __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); - __ push(tmp); - - __ call_VM_leaf(copyfunc_addr, 5); -#else __ movl2ptr(length, length); //higher 32bits must be null __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); @@ -3335,8 +2621,6 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { __ call(RuntimeAddress(copyfunc_addr)); #endif -#endif - #ifndef PRODUCT if (PrintC1Statistics) { Label failed; @@ -3392,11 +2676,9 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { // but not necessarily exactly of type default_type. Label known_ok, halt; __ mov_metadata(tmp, default_type->constant_encoding()); -#ifdef _LP64 if (UseCompressedClassPointers) { __ encode_klass_not_null(tmp, rscratch1); } -#endif if (basic_type != T_OBJECT) { __ cmp_klass(tmp, dst, tmp2); @@ -3421,21 +2703,12 @@ void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) { } #endif -#ifdef _LP64 assert_different_registers(c_rarg0, dst, dst_pos, length); __ lea(c_rarg0, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); assert_different_registers(c_rarg1, length); __ lea(c_rarg1, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); __ mov(c_rarg2, length); -#else - __ lea(tmp, Address(src, src_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); - store_parameter(tmp, 0); - __ lea(tmp, Address(dst, dst_pos, scale, arrayOopDesc::base_offset_in_bytes(basic_type))); - store_parameter(tmp, 1); - store_parameter(length, 2); -#endif // _LP64 - bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0; bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0; const char *name; @@ -3508,7 +2781,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { ciMethod* method = op->profiled_method(); int bci = op->profiled_bci(); ciMethod* callee = op->profiled_callee(); - Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Register tmp_load_klass = rscratch1; // Update counter for all call types ciMethodData* md = method->method_data_or_null(); @@ -3579,7 +2852,7 @@ void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) { void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { Register obj = op->obj()->as_register(); Register tmp = op->tmp()->as_pointer_register(); - Register tmp_load_klass = LP64_ONLY(rscratch1) NOT_LP64(noreg); + Register tmp_load_klass = rscratch1; Address mdo_addr = as_Address(op->mdp()->as_address_ptr()); ciKlass* exact_klass = op->exact_klass(); intptr_t current_klass = op->current_klass(); @@ -3599,17 +2872,9 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { #ifdef ASSERT if (obj == tmp) { -#ifdef _LP64 assert_different_registers(obj, rscratch1, mdo_addr.base(), mdo_addr.index()); -#else - assert_different_registers(obj, mdo_addr.base(), mdo_addr.index()); -#endif } else { -#ifdef _LP64 assert_different_registers(obj, tmp, rscratch1, mdo_addr.base(), mdo_addr.index()); -#else - assert_different_registers(obj, tmp, mdo_addr.base(), mdo_addr.index()); -#endif } #endif if (do_null) { @@ -3663,9 +2928,7 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { } else { __ load_klass(tmp, obj, tmp_load_klass); } -#ifdef _LP64 __ mov(rscratch1, tmp); // save original value before XOR -#endif __ xorptr(tmp, mdo_addr); __ testptr(tmp, TypeEntries::type_klass_mask); // klass seen before, nothing to do. The unknown bit may have been @@ -3678,7 +2941,6 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { if (TypeEntries::is_type_none(current_klass)) { __ testptr(mdo_addr, TypeEntries::type_mask); __ jccb(Assembler::zero, none); -#ifdef _LP64 // There is a chance that the checks above (re-reading profiling // data from memory) fail if another thread has just set the // profiling to this obj's klass @@ -3686,7 +2948,6 @@ void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) { __ xorptr(tmp, mdo_addr); __ testptr(tmp, TypeEntries::type_klass_mask); __ jccb(Assembler::zero, next); -#endif } } else { assert(ciTypeEntries::valid_ciklass(current_klass) != nullptr && @@ -3780,33 +3041,16 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { } else if (left->is_double_cpu()) { Register lo = left->as_register_lo(); -#ifdef _LP64 Register dst = dest->as_register_lo(); __ movptr(dst, lo); __ negptr(dst); -#else - Register hi = left->as_register_hi(); - __ lneg(hi, lo); - if (dest->as_register_lo() == hi) { - assert(dest->as_register_hi() != lo, "destroying register"); - move_regs(hi, dest->as_register_hi()); - move_regs(lo, dest->as_register_lo()); - } else { - move_regs(lo, dest->as_register_lo()); - move_regs(hi, dest->as_register_hi()); - } -#endif // _LP64 } else if (dest->is_single_xmm()) { -#ifdef _LP64 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { assert(tmp->is_valid(), "need temporary"); assert_different_registers(left->as_xmm_float_reg(), tmp->as_xmm_float_reg()); __ vpxor(dest->as_xmm_float_reg(), tmp->as_xmm_float_reg(), left->as_xmm_float_reg(), 2); - } - else -#endif - { + } else { assert(!tmp->is_valid(), "do not need temporary"); if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) { __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg()); @@ -3816,15 +3060,11 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { rscratch1); } } else if (dest->is_double_xmm()) { -#ifdef _LP64 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { assert(tmp->is_valid(), "need temporary"); assert_different_registers(left->as_xmm_double_reg(), tmp->as_xmm_double_reg()); __ vpxor(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), left->as_xmm_double_reg(), 2); - } - else -#endif - { + } else { assert(!tmp->is_valid(), "do not need temporary"); if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) { __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg()); @@ -3833,12 +3073,6 @@ void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) { ExternalAddress((address)double_signflip_pool), rscratch1); } -#ifndef _LP64 - } else if (left->is_single_fpu() || left->is_double_fpu()) { - assert(left->fpu() == 0, "arg must be on TOS"); - assert(dest->fpu() == 0, "dest must be TOS"); - __ fchs(); -#endif // !_LP64 } else { ShouldNotReachHere(); @@ -3885,13 +3119,7 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, if (src->is_double_xmm()) { if (dest->is_double_cpu()) { -#ifdef _LP64 __ movdq(dest->as_register_lo(), src->as_xmm_double_reg()); -#else - __ movdl(dest->as_register_lo(), src->as_xmm_double_reg()); - __ psrlq(src->as_xmm_double_reg(), 32); - __ movdl(dest->as_register_hi(), src->as_xmm_double_reg()); -#endif // _LP64 } else if (dest->is_double_stack()) { __ movdbl(frame_map()->address_for_slot(dest->double_stack_ix()), src->as_xmm_double_reg()); } else if (dest->is_address()) { @@ -3909,28 +3137,6 @@ void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, ShouldNotReachHere(); } -#ifndef _LP64 - } else if (src->is_double_fpu()) { - assert(src->fpu_regnrLo() == 0, "must be TOS"); - if (dest->is_double_stack()) { - __ fistp_d(frame_map()->address_for_slot(dest->double_stack_ix())); - } else if (dest->is_address()) { - __ fistp_d(as_Address(dest->as_address_ptr())); - } else { - ShouldNotReachHere(); - } - - } else if (dest->is_double_fpu()) { - assert(dest->fpu_regnrLo() == 0, "must be TOS"); - if (src->is_double_stack()) { - __ fild_d(frame_map()->address_for_slot(src->double_stack_ix())); - } else if (src->is_address()) { - __ fild_d(as_Address(src->as_address_ptr())); - } else { - ShouldNotReachHere(); - } -#endif // !_LP64 - } else { ShouldNotReachHere(); } @@ -4013,12 +3219,7 @@ void LIR_Assembler::on_spin_wait() { void LIR_Assembler::get_thread(LIR_Opr result_reg) { assert(result_reg->is_register(), "check"); -#ifdef _LP64 - // __ get_thread(result_reg->as_register_lo()); __ mov(result_reg->as_register(), r15_thread); -#else - __ get_thread(result_reg->as_register()); -#endif // _LP64 } @@ -4039,7 +3240,6 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr } else if (data->is_oop()) { assert (code == lir_xchg, "xadd for oops"); Register obj = data->as_register(); -#ifdef _LP64 if (UseCompressedOops) { __ encode_heap_oop(obj); __ xchgl(obj, as_Address(src->as_address_ptr())); @@ -4047,11 +3247,7 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr } else { __ xchgptr(obj, as_Address(src->as_address_ptr())); } -#else - __ xchgl(obj, as_Address(src->as_address_ptr())); -#endif } else if (data->type() == T_LONG) { -#ifdef _LP64 assert(data->as_register_lo() == data->as_register_hi(), "should be a single register"); if (code == lir_xadd) { __ lock(); @@ -4059,9 +3255,6 @@ void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr } else { __ xchgq(data->as_register_lo(), as_Address(src->as_address_ptr())); } -#else - ShouldNotReachHere(); -#endif } else { ShouldNotReachHere(); } diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp index 7dae8ba8a5e82..8524dc90276f0 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.hpp @@ -46,9 +46,9 @@ Register recv, Label* update_done); enum { - _call_stub_size = NOT_LP64(15) LP64_ONLY(28), + _call_stub_size = 28, _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175), - _deopt_handler_size = NOT_LP64(10) LP64_ONLY(17) + _deopt_handler_size = 17 }; public: @@ -58,13 +58,4 @@ void store_parameter(jobject c, int offset_from_esp_in_words); void store_parameter(Metadata* c, int offset_from_esp_in_words); -#ifndef _LP64 - void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack); - - void fpop(); - void fxch(int i); - void fld(int i); - void ffree(int i); -#endif // !_LP64 - #endif // CPU_X86_C1_LIRASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp index 36e2021138f2e..f20f2057f9062 100644 --- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp @@ -93,13 +93,8 @@ LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) { case intTag: opr = FrameMap::rax_opr; break; case objectTag: opr = FrameMap::rax_oop_opr; break; case longTag: opr = FrameMap::long0_opr; break; -#ifdef _LP64 case floatTag: opr = FrameMap::xmm0_float_opr; break; case doubleTag: opr = FrameMap::xmm0_double_opr; break; -#else - case floatTag: opr = UseSSE >= 1 ? FrameMap::xmm0_float_opr : FrameMap::fpu0_float_opr; break; - case doubleTag: opr = UseSSE >= 2 ? FrameMap::xmm0_double_opr : FrameMap::fpu0_double_opr; break; -#endif // _LP64 case addressTag: default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; } @@ -148,7 +143,6 @@ bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { LIR_Opr LIRGenerator::safepoint_poll_register() { - NOT_LP64( return new_register(T_ADDRESS); ) return LIR_OprFact::illegalOpr; } @@ -158,7 +152,6 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, assert(base->is_register(), "must be"); if (index->is_constant()) { LIR_Const *constant = index->as_constant_ptr(); -#ifdef _LP64 jlong c; if (constant->type() == T_INT) { c = (jlong(index->as_jint()) << shift) + disp; @@ -173,11 +166,6 @@ LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index, __ move(index, tmp); return new LIR_Address(base, tmp, type); } -#else - return new LIR_Address(base, - ((intx)(constant->as_jint()) << shift) + disp, - type); -#endif } else { return new LIR_Address(base, index, (LIR_Address::Scale)shift, disp, type); } @@ -191,7 +179,6 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o LIR_Address* addr; if (index_opr->is_constant()) { int elem_size = type2aelembytes(type); -#ifdef _LP64 jint index = index_opr->as_jint(); jlong disp = offset_in_bytes + (jlong)(index) * elem_size; if (disp > max_jint) { @@ -203,28 +190,12 @@ LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_o } else { addr = new LIR_Address(array_opr, (intx)disp, type); } -#else - // A displacement overflow can also occur for x86 but that is not a problem due to the 32-bit address range! - // Let's assume an array 'a' and an access with displacement 'disp'. When disp overflows, then "a + disp" will - // always be negative (i.e. underflows the 32-bit address range): - // Let N = 2^32: a + signed_overflow(disp) = a + disp - N. - // "a + disp" is always smaller than N. If an index was chosen which would point to an address beyond N, then - // range checks would catch that and throw an exception. Thus, a + disp < 0 holds which means that it always - // underflows the 32-bit address range: - // unsigned_underflow(a + signed_overflow(disp)) = unsigned_underflow(a + disp - N) - // = (a + disp - N) + N = a + disp - // This shows that we still end up at the correct address with a displacement overflow due to the 32-bit address - // range limitation. This overflow only needs to be handled if addresses can be larger as on 64-bit platforms. - addr = new LIR_Address(array_opr, offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type); -#endif // _LP64 } else { -#ifdef _LP64 if (index_opr->type() == T_INT) { LIR_Opr tmp = new_register(T_LONG); __ convert(Bytecodes::_i2l, index_opr, tmp); index_opr = tmp; } -#endif // _LP64 addr = new LIR_Address(array_opr, index_opr, LIR_Address::scale(type), @@ -345,7 +316,6 @@ void LIRGenerator::do_NegateOp(NegateOp* x) { LIR_Opr reg = rlock(x); LIR_Opr tmp = LIR_OprFact::illegalOpr; -#ifdef _LP64 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { if (x->type()->tag() == doubleTag) { tmp = new_register(T_DOUBLE); @@ -356,10 +326,9 @@ void LIRGenerator::do_NegateOp(NegateOp* x) { __ move(LIR_OprFact::floatConst(-0.0), tmp); } } -#endif __ negate(value.result(), reg, tmp); - set_result(x, round_item(reg)); + set_result(x, reg); } // for _fadd, _fmul, _fsub, _fdiv, _frem @@ -377,34 +346,12 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { left.dont_load_item(); } -#ifndef _LP64 - // do not load right operand if it is a constant. only 0 and 1 are - // loaded because there are special instructions for loading them - // without memory access (not needed for SSE2 instructions) - bool must_load_right = false; - if (right.is_constant()) { - LIR_Const* c = right.result()->as_constant_ptr(); - assert(c != nullptr, "invalid constant"); - assert(c->type() == T_FLOAT || c->type() == T_DOUBLE, "invalid type"); - - if (c->type() == T_FLOAT) { - must_load_right = UseSSE < 1 && (c->is_one_float() || c->is_zero_float()); - } else { - must_load_right = UseSSE < 2 && (c->is_one_double() || c->is_zero_double()); - } - } -#endif // !LP64 - if (must_load_both) { // frem and drem destroy also right operand, so move it to a new register right.set_destroys_register(); right.load_item(); } else if (right.is_register()) { right.load_item(); -#ifndef _LP64 - } else if (must_load_right) { - right.load_item(); -#endif // !LP64 } else { right.dont_load_item(); } @@ -414,7 +361,6 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { tmp = new_register(T_DOUBLE); } -#ifdef _LP64 if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) { // frem and drem are implemented as a direct call into the runtime. LIRItem left(x->x(), this); @@ -447,29 +393,8 @@ void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) { __ move(result_reg, result); } else { arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), tmp); - set_result(x, round_item(reg)); - } -#else - if ((UseSSE >= 1 && x->op() == Bytecodes::_frem) || (UseSSE >= 2 && x->op() == Bytecodes::_drem)) { - // special handling for frem and drem: no SSE instruction, so must use FPU with temporary fpu stack slots - LIR_Opr fpu0, fpu1; - if (x->op() == Bytecodes::_frem) { - fpu0 = LIR_OprFact::single_fpu(0); - fpu1 = LIR_OprFact::single_fpu(1); - } else { - fpu0 = LIR_OprFact::double_fpu(0); - fpu1 = LIR_OprFact::double_fpu(1); - } - __ move(right.result(), fpu1); // order of left and right operand is important! - __ move(left.result(), fpu0); - __ rem (fpu0, fpu1, fpu0); - __ move(fpu0, reg); - - } else { - arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), tmp); + set_result(x, reg); } - set_result(x, round_item(reg)); -#endif // _LP64 } @@ -759,7 +684,7 @@ LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) value.load_item(); // Because we want a 2-arg form of xchg and xadd __ move(value.result(), result); - assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); + assert(type == T_INT || is_oop || type == T_LONG, "unexpected type"); __ xchg(addr, result, result, LIR_OprFact::illegalOpr); return result; } @@ -769,7 +694,7 @@ LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { value.load_item(); // Because we want a 2-arg form of xchg and xadd __ move(value.result(), result); - assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type"); + assert(type == T_INT || type == T_LONG, "unexpected type"); __ xadd(addr, result, result, LIR_OprFact::illegalOpr); return result; } @@ -807,11 +732,7 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog || x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos || x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan || - x->id() == vmIntrinsics::_dlog10 -#ifdef _LP64 - || x->id() == vmIntrinsics::_dtanh -#endif - ) { + x->id() == vmIntrinsics::_dlog10 || x->id() == vmIntrinsics::_dtanh) { do_LibmIntrinsic(x); return; } @@ -819,24 +740,17 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { LIRItem value(x->argument_at(0), this); bool use_fpu = false; -#ifndef _LP64 - if (UseSSE < 2) { - value.set_destroys_register(); - } -#endif // !LP64 value.load_item(); LIR_Opr calc_input = value.result(); LIR_Opr calc_result = rlock_result(x); LIR_Opr tmp = LIR_OprFact::illegalOpr; -#ifdef _LP64 if (UseAVX > 2 && (!VM_Version::supports_avx512vl()) && (x->id() == vmIntrinsics::_dabs)) { tmp = new_register(T_DOUBLE); __ move(LIR_OprFact::doubleConst(-0.0), tmp); } -#endif if (x->id() == vmIntrinsics::_floatToFloat16) { tmp = new_register(T_FLOAT); __ move(LIR_OprFact::floatConst(-0.0), tmp); @@ -892,62 +806,6 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { value.load_item_force(cc->at(0)); } -#ifndef _LP64 - LIR_Opr tmp = FrameMap::fpu0_double_opr; - result_reg = tmp; - switch(x->id()) { - case vmIntrinsics::_dexp: - if (StubRoutines::dexp() != nullptr) { - __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dlog: - if (StubRoutines::dlog() != nullptr) { - __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dlog10: - if (StubRoutines::dlog10() != nullptr) { - __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dpow: - if (StubRoutines::dpow() != nullptr) { - __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dsin: - if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) { - __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dcos: - if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) { - __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args()); - } - break; - case vmIntrinsics::_dtan: - if (StubRoutines::dtan() != nullptr) { - __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args()); - } else { - __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args()); - } - break; - default: ShouldNotReachHere(); - } -#else switch (x->id()) { case vmIntrinsics::_dexp: if (StubRoutines::dexp() != nullptr) { @@ -1006,7 +864,6 @@ void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) { break; default: ShouldNotReachHere(); } -#endif // _LP64 __ move(result_reg, calc_result); } @@ -1039,20 +896,6 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) { flags = 0; } -#ifndef _LP64 - src.load_item_force (FrameMap::rcx_oop_opr); - src_pos.load_item_force (FrameMap::rdx_opr); - dst.load_item_force (FrameMap::rax_oop_opr); - dst_pos.load_item_force (FrameMap::rbx_opr); - length.load_item_force (FrameMap::rdi_opr); - LIR_Opr tmp = (FrameMap::rsi_opr); - - if (expected_type != nullptr && flags == 0) { - FrameMap* f = Compilation::current()->frame_map(); - f->update_reserved_argument_area_size(3 * BytesPerWord); - } -#else - // The java calling convention will give us enough registers // so that on the stub side the args will be perfect already. // On the other slow/special case side we call C and the arg @@ -1068,7 +911,6 @@ void LIRGenerator::do_ArrayCopy(Intrinsic* x) { length.load_item_force (FrameMap::as_opr(j_rarg4)); LIR_Opr tmp = FrameMap::as_opr(j_rarg5); -#endif // LP64 set_no_result(x); @@ -1110,18 +952,11 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) { } LIR_Opr base_op = buf.result(); -#ifndef _LP64 - if (!is_updateBytes) { // long b raw address - base_op = new_register(T_INT); - __ convert(Bytecodes::_l2i, buf.result(), base_op); - } -#else if (index->is_valid()) { LIR_Opr tmp = new_register(T_LONG); __ convert(Bytecodes::_i2l, index, tmp); index = tmp; } -#endif LIR_Address* a = new LIR_Address(base_op, index, @@ -1190,14 +1025,6 @@ void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { } LIR_Opr result_b = b.result(); -#ifndef _LP64 - result_a = new_register(T_INT); - __ convert(Bytecodes::_l2i, a.result(), result_a); - result_b = new_register(T_INT); - __ convert(Bytecodes::_l2i, b.result(), result_b); -#endif - - LIR_Address* addr_a = new LIR_Address(result_a, result_aOffset, constant_aOffset, @@ -1231,22 +1058,8 @@ void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) { __ move(result_reg, result); } -#ifndef _LP64 -// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f -// _i2b, _i2c, _i2s -static LIR_Opr fixed_register_for(BasicType type) { - switch (type) { - case T_FLOAT: return FrameMap::fpu0_float_opr; - case T_DOUBLE: return FrameMap::fpu0_double_opr; - case T_INT: return FrameMap::rax_opr; - case T_LONG: return FrameMap::long0_opr; - default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr; - } -} -#endif void LIRGenerator::do_Convert(Convert* x) { -#ifdef _LP64 LIRItem value(x->value(), this); value.load_item(); LIR_Opr input = value.result(); @@ -1254,66 +1067,6 @@ void LIRGenerator::do_Convert(Convert* x) { __ convert(x->op(), input, result); assert(result->is_virtual(), "result must be virtual register"); set_result(x, result); -#else - // flags that vary for the different operations and different SSE-settings - bool fixed_input = false, fixed_result = false, round_result = false, needs_stub = false; - - switch (x->op()) { - case Bytecodes::_i2l: // fall through - case Bytecodes::_l2i: // fall through - case Bytecodes::_i2b: // fall through - case Bytecodes::_i2c: // fall through - case Bytecodes::_i2s: fixed_input = false; fixed_result = false; round_result = false; needs_stub = false; break; - - case Bytecodes::_f2d: fixed_input = UseSSE == 1; fixed_result = false; round_result = false; needs_stub = false; break; - case Bytecodes::_d2f: fixed_input = false; fixed_result = UseSSE == 1; round_result = UseSSE < 1; needs_stub = false; break; - case Bytecodes::_i2f: fixed_input = false; fixed_result = false; round_result = UseSSE < 1; needs_stub = false; break; - case Bytecodes::_i2d: fixed_input = false; fixed_result = false; round_result = false; needs_stub = false; break; - case Bytecodes::_f2i: fixed_input = false; fixed_result = false; round_result = false; needs_stub = true; break; - case Bytecodes::_d2i: fixed_input = false; fixed_result = false; round_result = false; needs_stub = true; break; - case Bytecodes::_l2f: fixed_input = false; fixed_result = UseSSE >= 1; round_result = UseSSE < 1; needs_stub = false; break; - case Bytecodes::_l2d: fixed_input = false; fixed_result = UseSSE >= 2; round_result = UseSSE < 2; needs_stub = false; break; - case Bytecodes::_f2l: fixed_input = true; fixed_result = true; round_result = false; needs_stub = false; break; - case Bytecodes::_d2l: fixed_input = true; fixed_result = true; round_result = false; needs_stub = false; break; - default: ShouldNotReachHere(); - } - - LIRItem value(x->value(), this); - value.load_item(); - LIR_Opr input = value.result(); - LIR_Opr result = rlock(x); - - // arguments of lir_convert - LIR_Opr conv_input = input; - LIR_Opr conv_result = result; - ConversionStub* stub = nullptr; - - if (fixed_input) { - conv_input = fixed_register_for(input->type()); - __ move(input, conv_input); - } - - assert(fixed_result == false || round_result == false, "cannot set both"); - if (fixed_result) { - conv_result = fixed_register_for(result->type()); - } else if (round_result) { - result = new_register(result->type()); - set_vreg_flag(result, must_start_in_memory); - } - - if (needs_stub) { - stub = new ConversionStub(x->op(), conv_input, conv_result); - } - - __ convert(x->op(), conv_input, conv_result, stub); - - if (result != conv_result) { - __ move(conv_result, result); - } - - assert(result->is_virtual(), "result must be virtual register"); - set_result(x, result); -#endif // _LP64 } @@ -1574,13 +1327,7 @@ void LIRGenerator::do_If(If* x) { LIR_Opr LIRGenerator::getThreadPointer() { -#ifdef _LP64 return FrameMap::as_pointer_opr(r15_thread); -#else - LIR_Opr result = new_register(T_INT); - __ get_thread(result); - return result; -#endif // } void LIRGenerator::trace_block_entry(BlockBegin* block) { @@ -1625,12 +1372,6 @@ void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result, LIR_Opr temp_double = new_register(T_DOUBLE); __ volatile_move(LIR_OprFact::address(address), temp_double, T_LONG, info); __ volatile_move(temp_double, result, T_LONG); -#ifndef _LP64 - if (UseSSE < 2) { - // no spill slot needed in SSE2 mode because xmm->cpu register move is possible - set_vreg_flag(result, must_start_in_memory); - } -#endif // !LP64 } else { __ load(address, result, info); } diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp index 6bdbfd1824caa..5df47bfcde7c0 100644 --- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp @@ -59,16 +59,9 @@ LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) { #ifndef PRODUCT void LIR_Address::verify() const { -#ifdef _LP64 assert(base()->is_cpu_register(), "wrong base operand"); assert(index()->is_illegal() || index()->is_double_cpu(), "wrong index operand"); assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA, "wrong type for addresses"); -#else - assert(base()->is_single_cpu(), "wrong base operand"); - assert(index()->is_illegal() || index()->is_single_cpu(), "wrong index operand"); - assert(base()->type() == T_ADDRESS || base()->type() == T_OBJECT || base()->type() == T_INT || base()->type() == T_METADATA, - "wrong type for addresses"); -#endif } #endif // PRODUCT diff --git a/src/hotspot/cpu/x86/c1_LinearScan_x86.cpp b/src/hotspot/cpu/x86/c1_LinearScan_x86.cpp deleted file mode 100644 index 917031faf8962..0000000000000 --- a/src/hotspot/cpu/x86/c1_LinearScan_x86.cpp +++ /dev/null @@ -1,1127 +0,0 @@ -/* - * Copyright (c) 2005, 2023, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "c1/c1_Instruction.hpp" -#include "c1/c1_LinearScan.hpp" -#include "utilities/bitMap.inline.hpp" - - -#ifdef _LP64 -void LinearScan::allocate_fpu_stack() { - // No FPU stack used on x86-64 -} -#else -//---------------------------------------------------------------------- -// Allocation of FPU stack slots (Intel x86 only) -//---------------------------------------------------------------------- - -void LinearScan::allocate_fpu_stack() { - // First compute which FPU registers are live at the start of each basic block - // (To minimize the amount of work we have to do if we have to merge FPU stacks) - if (ComputeExactFPURegisterUsage) { - Interval* intervals_in_register, *intervals_in_memory; - create_unhandled_lists(&intervals_in_register, &intervals_in_memory, is_in_fpu_register, nullptr); - - // ignore memory intervals by overwriting intervals_in_memory - // the dummy interval is needed to enforce the walker to walk until the given id: - // without it, the walker stops when the unhandled-list is empty -> live information - // beyond this point would be incorrect. - Interval* dummy_interval = new Interval(any_reg); - dummy_interval->add_range(max_jint - 2, max_jint - 1); - dummy_interval->set_next(Interval::end()); - intervals_in_memory = dummy_interval; - - IntervalWalker iw(this, intervals_in_register, intervals_in_memory); - - const int num_blocks = block_count(); - for (int i = 0; i < num_blocks; i++) { - BlockBegin* b = block_at(i); - - // register usage is only needed for merging stacks -> compute only - // when more than one predecessor. - // the block must not have any spill moves at the beginning (checked by assertions) - // spill moves would use intervals that are marked as handled and so the usage bit - // would been set incorrectly - - // NOTE: the check for number_of_preds > 1 is necessary. A block with only one - // predecessor may have spill moves at the begin of the block. - // If an interval ends at the current instruction id, it is not possible - // to decide if the register is live or not at the block begin -> the - // register information would be incorrect. - if (b->number_of_preds() > 1) { - int id = b->first_lir_instruction_id(); - ResourceBitMap regs(FrameMap::nof_fpu_regs); - - iw.walk_to(id); // walk after the first instruction (always a label) of the block - assert(iw.current_position() == id, "did not walk completely to id"); - - // Only consider FPU values in registers - Interval* interval = iw.active_first(fixedKind); - while (interval != Interval::end()) { - int reg = interval->assigned_reg(); - assert(reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg, "no fpu register"); - assert(interval->assigned_regHi() == -1, "must not have hi register (doubles stored in one register)"); - assert(interval->from() <= id && id < interval->to(), "interval out of range"); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("fpu reg %d is live because of ", reg - pd_first_fpu_reg); interval->print(); - } -#endif - - regs.set_bit(reg - pd_first_fpu_reg); - interval = interval->next(); - } - - b->set_fpu_register_usage(regs); - -#ifndef PRODUCT - if (TraceFPURegisterUsage) { - tty->print("FPU regs for block %d, LIR instr %d): ", b->block_id(), id); regs.print_on(tty); tty->cr(); - } -#endif - } - } - } - - FpuStackAllocator alloc(ir()->compilation(), this); - _fpu_stack_allocator = &alloc; - alloc.allocate(); - _fpu_stack_allocator = nullptr; -} - - -FpuStackAllocator::FpuStackAllocator(Compilation* compilation, LinearScan* allocator) - : _compilation(compilation) - , _allocator(allocator) - , _lir(nullptr) - , _pos(-1) - , _sim(compilation) - , _temp_sim(compilation) -{} - -void FpuStackAllocator::allocate() { - int num_blocks = allocator()->block_count(); - for (int i = 0; i < num_blocks; i++) { - // Set up to process block - BlockBegin* block = allocator()->block_at(i); - intArray* fpu_stack_state = block->fpu_stack_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- Begin of new Block %d -------", block->block_id()); - } -#endif - - assert(fpu_stack_state != nullptr || - block->end()->as_Base() != nullptr || - block->is_set(BlockBegin::exception_entry_flag), - "FPU stack state must be present due to linear-scan order for FPU stack allocation"); - // note: exception handler entries always start with an empty fpu stack - // because stack merging would be too complicated - - if (fpu_stack_state != nullptr) { - sim()->read_state(fpu_stack_state); - } else { - sim()->clear(); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Reading FPU state for block %d:", block->block_id()); - sim()->print(); - tty->cr(); - } -#endif - - allocate_block(block); - CHECK_BAILOUT(); - } -} - -void FpuStackAllocator::allocate_block(BlockBegin* block) { - bool processed_merge = false; - LIR_OpList* insts = block->lir()->instructions_list(); - set_lir(block->lir()); - set_pos(0); - - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - _debug_information_computed = false; - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - LIR_OpBranch* branch = op->as_OpBranch(); - LIR_Op1* op1 = op->as_Op1(); - LIR_Op2* op2 = op->as_Op2(); - LIR_OpCall* opCall = op->as_OpCall(); - - if (branch != nullptr && branch->block() != nullptr) { - if (!processed_merge) { - // propagate stack at first branch to a successor - processed_merge = true; - bool required_merge = merge_fpu_stack_with_successors(block); - - assert(!required_merge || branch->cond() == lir_cond_always, "splitting of critical edges should prevent FPU stack mismatches at cond branches"); - } - - } else if (op1 != nullptr) { - handle_op1(op1); - } else if (op2 != nullptr) { - handle_op2(op2); - } else if (opCall != nullptr) { - handle_opCall(opCall); - } - - compute_debug_information(op); - - set_pos(1 + pos()); - } - - // Propagate stack when block does not end with branch - if (!processed_merge) { - merge_fpu_stack_with_successors(block); - } -} - - -void FpuStackAllocator::compute_debug_information(LIR_Op* op) { - if (!_debug_information_computed && op->id() != -1 && allocator()->has_info(op->id())) { - visitor.visit(op); - - // exception handling - if (allocator()->compilation()->has_exception_handlers()) { - XHandlers* xhandlers = visitor.all_xhandler(); - int n = xhandlers->length(); - for (int k = 0; k < n; k++) { - allocate_exception_handler(xhandlers->handler_at(k)); - } - } else { - assert(visitor.all_xhandler()->length() == 0, "missed exception handler"); - } - - // compute debug information - int n = visitor.info_count(); - assert(n > 0, "should not visit operation otherwise"); - - for (int j = 0; j < n; j++) { - CodeEmitInfo* info = visitor.info_at(j); - // Compute debug information - allocator()->compute_debug_info(info, op->id()); - } - } - _debug_information_computed = true; -} - -void FpuStackAllocator::allocate_exception_handler(XHandler* xhandler) { - if (!sim()->is_empty()) { - LIR_List* old_lir = lir(); - int old_pos = pos(); - intArray* old_state = sim()->write_state(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- begin of exception handler -------"); - } -#endif - - if (xhandler->entry_code() == nullptr) { - // need entry code to clear FPU stack - LIR_List* entry_code = new LIR_List(_compilation); - entry_code->jump(xhandler->entry_block()); - xhandler->set_entry_code(entry_code); - } - - LIR_OpList* insts = xhandler->entry_code()->instructions_list(); - set_lir(xhandler->entry_code()); - set_pos(0); - - // Note: insts->length() may change during loop - while (pos() < insts->length()) { - LIR_Op* op = insts->at(pos()); - -#ifndef PRODUCT - if (TraceFPUStack) { - op->print(); - } - check_invalid_lir_op(op); -#endif - - switch (op->code()) { - case lir_move: - assert(op->as_Op1() != nullptr, "must be LIR_Op1"); - assert(pos() != insts->length() - 1, "must not be last operation"); - - handle_op1((LIR_Op1*)op); - break; - - case lir_branch: - assert(op->as_OpBranch()->cond() == lir_cond_always, "must be unconditional branch"); - assert(pos() == insts->length() - 1, "must be last operation"); - - // remove all remaining dead registers from FPU stack - clear_fpu_stack(LIR_OprFact::illegalOpr); - break; - - default: - // other operations not allowed in exception entry code - ShouldNotReachHere(); - } - - set_pos(pos() + 1); - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print_cr("------- end of exception handler -------"); - } -#endif - - set_lir(old_lir); - set_pos(old_pos); - sim()->read_state(old_state); - } -} - - -int FpuStackAllocator::fpu_num(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - return opr->is_single_fpu() ? opr->fpu_regnr() : opr->fpu_regnrLo(); -} - -int FpuStackAllocator::tos_offset(LIR_Opr opr) { - return sim()->offset_from_tos(fpu_num(opr)); -} - - -LIR_Opr FpuStackAllocator::to_fpu_stack(LIR_Opr opr) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - - int stack_offset = tos_offset(opr); - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - -LIR_Opr FpuStackAllocator::to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset) { - assert(opr->is_fpu_register() && !opr->is_xmm_register(), "shouldn't call this otherwise"); - assert(dont_check_offset || tos_offset(opr) == 0, "operand is not on stack top"); - - int stack_offset = 0; - if (opr->is_single_fpu()) { - return LIR_OprFact::single_fpu(stack_offset)->make_fpu_stack_offset(); - } else { - assert(opr->is_double_fpu(), "shouldn't call this otherwise"); - return LIR_OprFact::double_fpu(stack_offset)->make_fpu_stack_offset(); - } -} - - - -void FpuStackAllocator::insert_op(LIR_Op* op) { - lir()->insert_before(pos(), op); - set_pos(1 + pos()); -} - - -void FpuStackAllocator::insert_exchange(int offset) { - if (offset > 0) { - LIR_Op1* fxch_op = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fxch_op); - sim()->swap(offset); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", sim()->get_slot(0)); sim()->print(); tty->cr(); - } -#endif - - } -} - -void FpuStackAllocator::insert_exchange(LIR_Opr opr) { - insert_exchange(tos_offset(opr)); -} - - -void FpuStackAllocator::insert_free(int offset) { - // move stack slot to the top of stack and then pop it - insert_exchange(offset); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - insert_op(fpop); - sim()->pop(); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted pop New state: "); sim()->print(); tty->cr(); - } -#endif -} - - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr) { - if (sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore) { - if (fpu_num(opr) != fpu_num(ignore) && sim()->contains(fpu_num(opr))) { - int res_slot = tos_offset(opr); - insert_free(res_slot); - } -} - -void FpuStackAllocator::insert_copy(LIR_Opr from, LIR_Opr to) { - int offset = tos_offset(from); - LIR_Op1* fld = new LIR_Op1(lir_fld, LIR_OprFact::intConst(offset), LIR_OprFact::illegalOpr); - insert_op(fld); - - sim()->push(fpu_num(to)); - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Inserted copy (%d -> %d) New state: ", fpu_num(from), fpu_num(to)); sim()->print(); tty->cr(); - } -#endif -} - -void FpuStackAllocator::do_rename(LIR_Opr from, LIR_Opr to) { - sim()->rename(fpu_num(from), fpu_num(to)); -} - -void FpuStackAllocator::do_push(LIR_Opr opr) { - sim()->push(fpu_num(opr)); -} - -void FpuStackAllocator::pop_if_last_use(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count already set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - if (opr->is_last_use()) { - op->set_fpu_pop_count(1); - sim()->pop(); - } -} - -void FpuStackAllocator::pop_always(LIR_Op* op, LIR_Opr opr) { - assert(op->fpu_pop_count() == 0, "fpu_pop_count already set"); - assert(tos_offset(opr) == 0, "can only pop stack top"); - - op->set_fpu_pop_count(1); - sim()->pop(); -} - -void FpuStackAllocator::clear_fpu_stack(LIR_Opr preserve) { - int result_stack_size = (preserve->is_fpu_register() && !preserve->is_xmm_register() ? 1 : 0); - while (sim()->stack_size() > result_stack_size) { - assert(!sim()->slot_is_empty(0), "not allowed"); - - if (result_stack_size == 0 || sim()->get_slot(0) != fpu_num(preserve)) { - insert_free(0); - } else { - // move "preserve" to bottom of stack so that all other stack slots can be popped - insert_exchange(sim()->stack_size() - 1); - } - } -} - - -void FpuStackAllocator::handle_op1(LIR_Op1* op1) { - LIR_Opr in = op1->in_opr(); - LIR_Opr res = op1->result_opr(); - - LIR_Opr new_in = in; // new operands relative to the actual fpu stack top - LIR_Opr new_res = res; - - // Note: this switch is processed for all LIR_Op1, regardless if they have FPU-arguments, - // so checks for is_float_kind() are necessary inside the cases - switch (op1->code()) { - - case lir_return: { - // FPU-Stack must only contain the (optional) fpu return value. - // All remaining dead values are popped from the stack - // If the input operand is a fpu-register, it is exchanged to the bottom of the stack - - clear_fpu_stack(in); - if (in->is_fpu_register() && !in->is_xmm_register()) { - new_in = to_fpu_stack_top(in); - } - - break; - } - - case lir_move: { - if (in->is_fpu_register() && !in->is_xmm_register()) { - if (res->is_xmm_register()) { - // move from fpu register to xmm register (necessary for operations that - // are not available in the SSE instruction set) - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from fpu-register to fpu-register: - // * input and result register equal: - // nothing to do - // * input register is last use: - // rename the input register to result register -> input register - // not present on fpu-stack afterwards - // * input register not last use: - // duplicate input register to result register to preserve input - // - // Note: The LIR-Assembler does not produce any code for fpu register moves, - // so input and result stack index must be equal - - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - } - new_in = to_fpu_stack(res); - new_res = new_in; - - } else { - // move from fpu-register to memory - // input operand must be on top of stack - - insert_exchange(in); - - // create debug information here because afterwards the register may have been popped - compute_debug_information(op1); - - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - } - - } else if (res->is_fpu_register() && !res->is_xmm_register()) { - // move from memory/constant to fpu register - // result is pushed on the stack - - insert_free_if_dead(res); - - // create debug information before register is pushed - compute_debug_information(op1); - - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - } - - case lir_convert: { - Bytecodes::Code bc = op1->as_OpConvert()->bytecode(); - switch (bc) { - case Bytecodes::_d2f: - case Bytecodes::_f2d: - assert(res->is_fpu_register(), "must be"); - assert(in->is_fpu_register(), "must be"); - - if (!in->is_xmm_register() && !res->is_xmm_register()) { - // this is quite the same as a move from fpu-register to fpu-register - // Note: input and result operands must have different types - if (fpu_num(in) == fpu_num(res)) { - // nothing to do - new_in = to_fpu_stack(in); - } else if (in->is_last_use()) { - insert_free_if_dead(res);//, in); - new_in = to_fpu_stack(in); - do_rename(in, res); - } else { - insert_free_if_dead(res); - insert_copy(in, res); - new_in = to_fpu_stack_top(in, true); - } - new_res = to_fpu_stack(res); - } - - break; - - case Bytecodes::_i2f: - case Bytecodes::_l2f: - case Bytecodes::_i2d: - case Bytecodes::_l2d: - assert(res->is_fpu_register(), "must be"); - if (!res->is_xmm_register()) { - insert_free_if_dead(res); - do_push(res); - new_res = to_fpu_stack_top(res); - } - break; - - case Bytecodes::_f2i: - case Bytecodes::_d2i: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - - // TODO: update registers of stub - } - break; - - case Bytecodes::_f2l: - case Bytecodes::_d2l: - assert(in->is_fpu_register(), "must be"); - if (!in->is_xmm_register()) { - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_always(op1, in); - } - break; - - case Bytecodes::_i2l: - case Bytecodes::_l2i: - case Bytecodes::_i2b: - case Bytecodes::_i2c: - case Bytecodes::_i2s: - // no fpu operands - break; - - default: - ShouldNotReachHere(); - } - break; - } - - case lir_roundfp: { - assert(in->is_fpu_register() && !in->is_xmm_register(), "input must be in register"); - assert(res->is_stack(), "result must be on stack"); - - insert_exchange(in); - new_in = to_fpu_stack_top(in); - pop_if_last_use(op1, in); - break; - } - - default: { - assert(!in->is_float_kind() && !res->is_float_kind(), "missed a fpu-operation"); - } - } - - op1->set_in_opr(new_in); - op1->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_op2(LIR_Op2* op2) { - LIR_Opr left = op2->in_opr1(); - if (!left->is_float_kind()) { - return; - } - if (left->is_xmm_register()) { - return; - } - - LIR_Opr right = op2->in_opr2(); - LIR_Opr res = op2->result_opr(); - LIR_Opr new_left = left; // new operands relative to the actual fpu stack top - LIR_Opr new_right = right; - LIR_Opr new_res = res; - - assert(!left->is_xmm_register() && !right->is_xmm_register() && !res->is_xmm_register(), "not for xmm registers"); - - switch (op2->code()) { - case lir_cmp: - case lir_cmp_fd2i: - case lir_ucmp_fd2i: - case lir_assert: { - assert(left->is_fpu_register(), "invalid LIR"); - assert(right->is_fpu_register(), "invalid LIR"); - - // the left-hand side must be on top of stack. - // the right-hand side is never popped, even if is_last_use is set - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - pop_if_last_use(op2, left); - break; - } - - case lir_mul: - case lir_div: { - if (res->is_double_fpu()) { - assert(op2->tmp1_opr()->is_fpu_register(), "strict operations need temporary fpu stack slot"); - insert_free_if_dead(op2->tmp1_opr()); - assert(sim()->stack_size() <= 7, "at least one stack slot must be free"); - } - // fall-through: continue with the normal handling of lir_mul and lir_div - } - case lir_add: - case lir_sub: { - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // either the left-hand or the right-hand side must be on top of stack - // (if right is not a register, left must be on top) - if (!right->is_fpu_register()) { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - } else { - // no exchange necessary if right is already on top of stack - if (tos_offset(right) == 0) { - new_left = to_fpu_stack(left); - new_right = to_fpu_stack_top(right); - } else { - insert_exchange(left); - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - } - - if (right->is_last_use()) { - op2->set_fpu_pop_count(1); - - if (tos_offset(right) == 0) { - sim()->pop(); - } else { - // if left is on top of stack, the result is placed in the stack - // slot of right, so a renaming from right to res is necessary - assert(tos_offset(left) == 0, "must be"); - sim()->pop(); - do_rename(right, res); - } - } - } - new_res = to_fpu_stack(res); - - break; - } - - case lir_rem: { - assert(left->is_fpu_register(), "must be"); - assert(right->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_equal(res), "must be"); - - // Must bring both operands to top of stack with following operand ordering: - // * fpu stack before rem: ... right left - // * fpu stack after rem: ... left - if (tos_offset(right) != 1) { - insert_exchange(right); - insert_exchange(1); - } - insert_exchange(left); - assert(tos_offset(right) == 1, "check"); - assert(tos_offset(left) == 0, "check"); - - new_left = to_fpu_stack_top(left); - new_right = to_fpu_stack(right); - - op2->set_fpu_pop_count(1); - sim()->pop(); - do_rename(right, res); - - new_res = to_fpu_stack_top(res); - break; - } - - case lir_abs: - case lir_sqrt: - case lir_neg: { - // Right argument appears to be unused - assert(right->is_illegal(), "must be"); - assert(left->is_fpu_register(), "must be"); - assert(res->is_fpu_register(), "must be"); - assert(left->is_last_use(), "old value gets destroyed"); - - insert_free_if_dead(res, left); - insert_exchange(left); - do_rename(left, res); - - new_left = to_fpu_stack_top(res); - new_res = new_left; - - op2->set_fpu_stack_size(sim()->stack_size()); - break; - } - - default: { - assert(false, "missed a fpu-operation"); - } - } - - op2->set_in_opr1(new_left); - op2->set_in_opr2(new_right); - op2->set_result_opr(new_res); -} - -void FpuStackAllocator::handle_opCall(LIR_OpCall* opCall) { - LIR_Opr res = opCall->result_opr(); - - // clear fpu-stack before call - // it may contain dead values that could not have been removed by previous operations - clear_fpu_stack(LIR_OprFact::illegalOpr); - assert(sim()->is_empty(), "fpu stack must be empty now"); - - // compute debug information before (possible) fpu result is pushed - compute_debug_information(opCall); - - if (res->is_fpu_register() && !res->is_xmm_register()) { - do_push(res); - opCall->set_result_opr(to_fpu_stack_top(res)); - } -} - -#ifndef PRODUCT -void FpuStackAllocator::check_invalid_lir_op(LIR_Op* op) { - switch (op->code()) { - case lir_fpop_raw: - case lir_fxch: - case lir_fld: - assert(false, "operations only inserted by FpuStackAllocator"); - break; - - default: - break; - } -} -#endif - - -void FpuStackAllocator::merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg) { - LIR_Op1* move = new LIR_Op1(lir_move, LIR_OprFact::doubleConst(0), LIR_OprFact::double_fpu(reg)->make_fpu_stack_offset()); - - instrs->instructions_list()->push(move); - - cur_sim->push(reg); - move->set_result_opr(to_fpu_stack(move->result_opr())); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Added new register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot) { - assert(slot > 0, "no exchange necessary"); - - LIR_Op1* fxch = new LIR_Op1(lir_fxch, LIR_OprFact::intConst(slot)); - instrs->instructions_list()->push(fxch); - cur_sim->swap(slot); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Exchanged register: %d New state: ", cur_sim->get_slot(slot)); cur_sim->print(); tty->cr(); - } - #endif -} - -void FpuStackAllocator::merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim) { - int reg = cur_sim->get_slot(0); - - LIR_Op* fpop = new LIR_Op0(lir_fpop_raw); - instrs->instructions_list()->push(fpop); - cur_sim->pop(reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Removed register: %d New state: ", reg); cur_sim->print(); tty->cr(); - } - #endif -} - -bool FpuStackAllocator::merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot) { - int reg = cur_sim->get_slot(change_slot); - - for (int slot = start_slot; slot >= 0; slot--) { - int new_reg = sux_sim->get_slot(slot); - - if (!cur_sim->contains(new_reg)) { - cur_sim->set_slot(change_slot, new_reg); - - #ifndef PRODUCT - if (TraceFPUStack) { - tty->print("Renamed register %d to %d New state: ", reg, new_reg); cur_sim->print(); tty->cr(); - } - #endif - - return true; - } - } - return false; -} - - -void FpuStackAllocator::merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - } - - int slot; - for (slot = 0; slot < cur_sim->stack_size(); slot++) { - assert(!cur_sim->slot_is_empty(slot), "not handled by algorithm"); - } - for (slot = 0; slot < sux_sim->stack_size(); slot++) { - assert(!sux_sim->slot_is_empty(slot), "not handled by algorithm"); - } -#endif - - // size difference between cur and sux that must be resolved by adding or removing values form the stack - int size_diff = cur_sim->stack_size() - sux_sim->stack_size(); - - if (!ComputeExactFPURegisterUsage) { - // add slots that are currently free, but used in successor - // When the exact FPU register usage is computed, the stack does - // not contain dead values at merging -> no values must be added - - int sux_slot = sux_sim->stack_size() - 1; - while (size_diff < 0) { - assert(sux_slot >= 0, "slot out of bounds -> error in algorithm"); - - int reg = sux_sim->get_slot(sux_slot); - if (!cur_sim->contains(reg)) { - merge_insert_add(instrs, cur_sim, reg); - size_diff++; - - if (sux_slot + size_diff != 0) { - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - } - } - sux_slot--; - } - } - - assert(cur_sim->stack_size() >= sux_sim->stack_size(), "stack size must be equal or greater now"); - assert(size_diff == cur_sim->stack_size() - sux_sim->stack_size(), "must be"); - - // stack merge algorithm: - // 1) as long as the current stack top is not in the right location (that means - // it should not be on the stack top), exchange it into the right location - // 2) if the stack top is right, but the remaining stack is not ordered correctly, - // the stack top is exchanged away to get another value on top -> - // now step 1) can be continued - // the stack can also contain unused items -> these items are removed from stack - - int finished_slot = sux_sim->stack_size() - 1; - while (finished_slot >= 0 || size_diff > 0) { - while (size_diff > 0 || (cur_sim->stack_size() > 0 && cur_sim->get_slot(0) != sux_sim->get_slot(0))) { - int reg = cur_sim->get_slot(0); - if (sux_sim->contains(reg)) { - int sux_slot = sux_sim->offset_from_tos(reg); - merge_insert_xchg(instrs, cur_sim, sux_slot + size_diff); - - } else if (!merge_rename(cur_sim, sux_sim, finished_slot, 0)) { - assert(size_diff > 0, "must be"); - - merge_insert_pop(instrs, cur_sim); - size_diff--; - } - assert(cur_sim->stack_size() == 0 || cur_sim->get_slot(0) != reg, "register must have been changed"); - } - - while (finished_slot >= 0 && cur_sim->get_slot(finished_slot) == sux_sim->get_slot(finished_slot)) { - finished_slot--; - } - - if (finished_slot >= 0) { - int reg = cur_sim->get_slot(finished_slot); - - if (sux_sim->contains(reg) || !merge_rename(cur_sim, sux_sim, finished_slot, finished_slot)) { - assert(sux_sim->contains(reg) || size_diff > 0, "must be"); - merge_insert_xchg(instrs, cur_sim, finished_slot); - } - assert(cur_sim->get_slot(finished_slot) != reg, "register must have been changed"); - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after merging: pred: "); cur_sim->print(); tty->cr(); - tty->print(" sux: "); sux_sim->print(); tty->cr(); - tty->cr(); - } -#endif - assert(cur_sim->stack_size() == sux_sim->stack_size(), "stack size must be equal now"); -} - - -void FpuStackAllocator::merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->cr(); - tty->print("before cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - } -#endif - - int slot = 0; - while (slot < cur_sim->stack_size()) { - int reg = cur_sim->get_slot(slot); - if (!live_fpu_regs.at(reg)) { - if (slot != 0) { - merge_insert_xchg(instrs, cur_sim, slot); - } - merge_insert_pop(instrs, cur_sim); - } else { - slot++; - } - } - -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print("after cleanup: state: "); cur_sim->print(); tty->cr(); - tty->print(" live: "); live_fpu_regs.print_on(tty); tty->cr(); - tty->cr(); - } - - // check if fpu stack only contains live registers - for (unsigned int i = 0; i < live_fpu_regs.size(); i++) { - if (live_fpu_regs.at(i) != cur_sim->contains(i)) { - tty->print_cr("mismatch between required and actual stack content"); - break; - } - } -#endif -} - - -bool FpuStackAllocator::merge_fpu_stack_with_successors(BlockBegin* block) { -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Propagating FPU stack state for B%d at LIR_Op position %d to successors:", - block->block_id(), pos()); - sim()->print(); - tty->cr(); - } -#endif - - bool changed = false; - int number_of_sux = block->number_of_sux(); - - if (number_of_sux == 1 && block->sux_at(0)->number_of_preds() > 1) { - // The successor has at least two incoming edges, so a stack merge will be necessary - // If this block is the first predecessor, cleanup the current stack and propagate it - // If this block is not the first predecessor, a stack merge will be necessary - - BlockBegin* sux = block->sux_at(0); - intArray* state = sux->fpu_stack_state(); - LIR_List* instrs = new LIR_List(_compilation); - - if (state != nullptr) { - // Merge with a successors that already has a FPU stack state - // the block must only have one successor because critical edges must been split - FpuStackSim* cur_sim = sim(); - FpuStackSim* sux_sim = temp_sim(); - sux_sim->read_state(state); - - merge_fpu_stack(instrs, cur_sim, sux_sim); - - } else { - // propagate current FPU stack state to successor without state - // clean up stack first so that there are no dead values on the stack - if (ComputeExactFPURegisterUsage) { - FpuStackSim* cur_sim = sim(); - ResourceBitMap live_fpu_regs = block->sux_at(0)->fpu_register_usage(); - assert(live_fpu_regs.size() == FrameMap::nof_fpu_regs, "missing register usage"); - - merge_cleanup_fpu_stack(instrs, cur_sim, live_fpu_regs); - } - - intArray* state = sim()->write_state(); - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d (merge path)", sux->block_id()); - sim()->print(); tty->cr(); - } - sux->set_fpu_stack_state(state); - } - - if (instrs->instructions_list()->length() > 0) { - lir()->insert_before(pos(), instrs); - set_pos(instrs->instructions_list()->length() + pos()); - changed = true; - } - - } else { - // Propagate unmodified Stack to successors where a stack merge is not necessary - intArray* state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - -#ifdef ASSERT - for (int j = 0; j < sux->number_of_preds(); j++) { - assert(block == sux->pred_at(j), "all critical edges must be broken"); - } - - // check if new state is same - if (sux->fpu_stack_state() != nullptr) { - intArray* sux_state = sux->fpu_stack_state(); - assert(state->length() == sux_state->length(), "overwriting existing stack state"); - for (int j = 0; j < state->length(); j++) { - assert(state->at(j) == sux_state->at(j), "overwriting existing stack state"); - } - } -#endif -#ifndef PRODUCT - if (TraceFPUStack) { - tty->print_cr("Setting FPU stack state of B%d", sux->block_id()); - sim()->print(); tty->cr(); - } -#endif - - sux->set_fpu_stack_state(state); - } - } - -#ifndef PRODUCT - // assertions that FPU stack state conforms to all successors' states - intArray* cur_state = sim()->write_state(); - for (int i = 0; i < number_of_sux; i++) { - BlockBegin* sux = block->sux_at(i); - intArray* sux_state = sux->fpu_stack_state(); - - assert(sux_state != nullptr, "no fpu state"); - assert(cur_state->length() == sux_state->length(), "incorrect length"); - for (int i = 0; i < cur_state->length(); i++) { - assert(cur_state->at(i) == sux_state->at(i), "element not equal"); - } - } -#endif - - return changed; -} -#endif // _LP64 diff --git a/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp b/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp index 50cdd14154c42..1bbbe13f3ad7f 100644 --- a/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp +++ b/src/hotspot/cpu/x86/c1_LinearScan_x86.hpp @@ -26,12 +26,6 @@ #define CPU_X86_C1_LINEARSCAN_X86_HPP inline bool LinearScan::is_processed_reg_num(int reg_num) { -#ifndef _LP64 - // rsp and rbp (numbers 6 ancd 7) are ignored - assert(FrameMap::rsp_opr->cpu_regnr() == 6, "wrong assumption below"); - assert(FrameMap::rbp_opr->cpu_regnr() == 7, "wrong assumption below"); - assert(reg_num >= 0, "invalid reg_num"); -#else // rsp and rbp, r10, r15 (numbers [12,15]) are ignored // r12 (number 11) is conditional on compressed oops. assert(FrameMap::r12_opr->cpu_regnr() == 11, "wrong assumption below"); @@ -40,16 +34,10 @@ inline bool LinearScan::is_processed_reg_num(int reg_num) { assert(FrameMap::rsp_opr->cpu_regnrLo() == 14, "wrong assumption below"); assert(FrameMap::rbp_opr->cpu_regnrLo() == 15, "wrong assumption below"); assert(reg_num >= 0, "invalid reg_num"); -#endif // _LP64 return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map; } inline int LinearScan::num_physical_regs(BasicType type) { - // Intel requires two cpu registers for long, - // but requires only one fpu register for double - if (LP64_ONLY(false &&) type == T_LONG) { - return 2; - } return 1; } @@ -77,18 +65,16 @@ inline void LinearScan::pd_add_temps(LIR_Op* op) { // could also consider not killing all xmm registers if we // assume that slow paths are uncommon but it's not clear that // would be a good idea. - if (UseSSE > 0) { #ifdef ASSERT - if (TraceLinearScanLevel >= 2) { - tty->print_cr("killing XMMs for trig"); - } + if (TraceLinearScanLevel >= 2) { + tty->print_cr("killing XMMs for trig"); + } #endif - int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms(); - int op_id = op->id(); - for (int xmm = 0; xmm < num_caller_save_xmm_regs; xmm++) { - LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(xmm); - add_temp(reg_num(opr), op_id, noUse, T_ILLEGAL); - } + int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms(); + int op_id = op->id(); + for (int xmm = 0; xmm < num_caller_save_xmm_regs; xmm++) { + LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(xmm); + add_temp(reg_num(opr), op_id, noUse, T_ILLEGAL); } break; } @@ -107,7 +93,7 @@ inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { _first_reg = pd_first_byte_reg; _last_reg = FrameMap::last_byte_reg(); return true; - } else if ((UseSSE >= 1 && cur->type() == T_FLOAT) || (UseSSE >= 2 && cur->type() == T_DOUBLE)) { + } else if ((cur->type() == T_FLOAT) || (cur->type() == T_DOUBLE)) { _first_reg = pd_first_xmm_reg; _last_reg = last_xmm_reg; return true; @@ -116,78 +102,4 @@ inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) { return false; } - -class FpuStackAllocator { - private: - Compilation* _compilation; - LinearScan* _allocator; - - LIR_OpVisitState visitor; - - LIR_List* _lir; - int _pos; - FpuStackSim _sim; - FpuStackSim _temp_sim; - - bool _debug_information_computed; - - LinearScan* allocator() { return _allocator; } - Compilation* compilation() const { return _compilation; } - - // unified bailout support - void bailout(const char* msg) const { compilation()->bailout(msg); } - bool bailed_out() const { return compilation()->bailed_out(); } - - int pos() { return _pos; } - void set_pos(int pos) { _pos = pos; } - LIR_Op* cur_op() { return lir()->instructions_list()->at(pos()); } - LIR_List* lir() { return _lir; } - void set_lir(LIR_List* lir) { _lir = lir; } - FpuStackSim* sim() { return &_sim; } - FpuStackSim* temp_sim() { return &_temp_sim; } - - int fpu_num(LIR_Opr opr); - int tos_offset(LIR_Opr opr); - LIR_Opr to_fpu_stack_top(LIR_Opr opr, bool dont_check_offset = false); - - // Helper functions for handling operations - void insert_op(LIR_Op* op); - void insert_exchange(int offset); - void insert_exchange(LIR_Opr opr); - void insert_free(int offset); - void insert_free_if_dead(LIR_Opr opr); - void insert_free_if_dead(LIR_Opr opr, LIR_Opr ignore); - void insert_copy(LIR_Opr from, LIR_Opr to); - void do_rename(LIR_Opr from, LIR_Opr to); - void do_push(LIR_Opr opr); - void pop_if_last_use(LIR_Op* op, LIR_Opr opr); - void pop_always(LIR_Op* op, LIR_Opr opr); - void clear_fpu_stack(LIR_Opr preserve); - void handle_op1(LIR_Op1* op1); - void handle_op2(LIR_Op2* op2); - void handle_opCall(LIR_OpCall* opCall); - void compute_debug_information(LIR_Op* op); - void allocate_exception_handler(XHandler* xhandler); - void allocate_block(BlockBegin* block); - -#ifndef PRODUCT - void check_invalid_lir_op(LIR_Op* op); -#endif - - // Helper functions for merging of fpu stacks - void merge_insert_add(LIR_List* instrs, FpuStackSim* cur_sim, int reg); - void merge_insert_xchg(LIR_List* instrs, FpuStackSim* cur_sim, int slot); - void merge_insert_pop(LIR_List* instrs, FpuStackSim* cur_sim); - bool merge_rename(FpuStackSim* cur_sim, FpuStackSim* sux_sim, int start_slot, int change_slot); - void merge_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, FpuStackSim* sux_sim); - void merge_cleanup_fpu_stack(LIR_List* instrs, FpuStackSim* cur_sim, BitMap& live_fpu_regs); - bool merge_fpu_stack_with_successors(BlockBegin* block); - - public: - LIR_Opr to_fpu_stack(LIR_Opr opr); // used by LinearScan for creation of debug information - - FpuStackAllocator(Compilation* compilation, LinearScan* allocator); - void allocate(); -}; - #endif // CPU_X86_C1_LINEARSCAN_X86_HPP diff --git a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp index f53a25ed3e646..a033a9ec7d6a5 100644 --- a/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_MacroAssembler_x86.cpp @@ -63,15 +63,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr } if (LockingMode == LM_LIGHTWEIGHT) { -#ifdef _LP64 - const Register thread = r15_thread; - lightweight_lock(disp_hdr, obj, hdr, thread, tmp, slow_case); -#else - // Implicit null check. - movptr(hdr, Address(obj, oopDesc::mark_offset_in_bytes())); - // Lacking registers and thread on x86_32. Always take slow path. - jmp(slow_case); -#endif + lightweight_lock(disp_hdr, obj, hdr, r15_thread, tmp, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; // Load object header @@ -136,12 +128,7 @@ void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_ verify_oop(obj); if (LockingMode == LM_LIGHTWEIGHT) { -#ifdef _LP64 lightweight_unlock(obj, disp_hdr, r15_thread, hdr, slow_case); -#else - // Lacking registers and thread on x86_32. Always take slow path. - jmp(slow_case); -#endif } else if (LockingMode == LM_LEGACY) { // test if object header is pointing to the displaced header, and if so, restore // the displaced header in the object - if the object header is not pointing to @@ -170,7 +157,6 @@ void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, i void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) { assert_different_registers(obj, klass, len, t1, t2); -#ifdef _LP64 if (UseCompactObjectHeaders) { movptr(t1, Address(klass, Klass::prototype_header_offset())); movptr(Address(obj, oopDesc::mark_offset_in_bytes()), t1); @@ -179,16 +165,13 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register movptr(t1, klass); encode_klass_not_null(t1, rscratch1); movl(Address(obj, oopDesc::klass_offset_in_bytes()), t1); - } else -#endif - { + } else { movptr(Address(obj, oopDesc::mark_offset_in_bytes()), checked_cast(markWord::prototype().value())); movptr(Address(obj, oopDesc::klass_offset_in_bytes()), klass); } if (len->is_valid()) { movl(Address(obj, arrayOopDesc::length_offset_in_bytes()), len); -#ifdef _LP64 int base_offset = arrayOopDesc::length_offset_in_bytes() + BytesPerInt; if (!is_aligned(base_offset, BytesPerWord)) { assert(is_aligned(base_offset, BytesPerInt), "must be 4-byte aligned"); @@ -196,14 +179,10 @@ void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register xorl(t1, t1); movl(Address(obj, base_offset), t1); } -#endif - } -#ifdef _LP64 - else if (UseCompressedClassPointers && !UseCompactObjectHeaders) { + } else if (UseCompressedClassPointers && !UseCompactObjectHeaders) { xorptr(t1, t1); store_klass_gap(obj, t1); } -#endif } @@ -266,8 +245,6 @@ void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register bind(loop); movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (1*BytesPerWord)), t1_zero); - NOT_LP64(movptr(Address(obj, index, Address::times_8, hdr_size_in_bytes - (2*BytesPerWord)), - t1_zero);) decrement(index); jcc(Assembler::notZero, loop); } @@ -333,12 +310,6 @@ void C1_MacroAssembler::build_frame(int frame_size_in_bytes, int bang_size_in_by if (PreserveFramePointer) { mov(rbp, rsp); } -#if !defined(_LP64) && defined(COMPILER2) - if (UseSSE < 2 && !CompilerConfig::is_c1_only_no_jvmci()) { - // c2 leaves fpu stack dirty. Clean it on entry - empty_FPU_stack(); - } -#endif // !_LP64 && COMPILER2 decrement(rsp, frame_size_in_bytes); // does not emit code for frame_size == 0 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); @@ -354,19 +325,17 @@ void C1_MacroAssembler::remove_frame(int frame_size_in_bytes) { void C1_MacroAssembler::verified_entry(bool breakAtEntry) { - if (breakAtEntry || VerifyFPU) { + if (breakAtEntry) { // Verified Entry first instruction should be 5 bytes long for correct // patching by patch_verified_entry(). // - // Breakpoint and VerifyFPU have one byte first instruction. + // Breakpoint has one byte first instruction. // Also first instruction will be one byte "push(rbp)" if stack banging // code is not generated (see build_frame() above). // For all these cases generate long instruction first. fat_nop(); } if (breakAtEntry) int3(); - // build frame - IA32_ONLY( verify_FPU(0, "method_entry"); ) } void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) { diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 5cc8ffd9befe4..5ebb9d35adad9 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -52,27 +52,17 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) { // setup registers - const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); // is callee-saved register (Visual C++ calling conventions) + const Register thread = r15_thread; // is callee-saved register (Visual C++ calling conventions) assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different"); assert(oop_result1 != thread && metadata_result != thread, "registers must be different"); assert(args_size >= 0, "illegal args_size"); bool align_stack = false; -#ifdef _LP64 // At a method handle call, the stack may not be properly aligned // when returning with an exception. align_stack = (stub_id() == (int)C1StubId::handle_exception_from_callee_id); -#endif -#ifdef _LP64 mov(c_rarg0, thread); set_num_rt_args(0); // Nothing on stack -#else - set_num_rt_args(1 + args_size); - - // push java thread (becomes first argument of C function) - get_thread(thread); - push(thread); -#endif // _LP64 int call_offset = -1; if (!align_stack) { @@ -105,9 +95,6 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre #endif reset_last_Java_frame(thread, true); - // discard thread and arguments - NOT_LP64(addptr(rsp, num_rt_args()*BytesPerWord)); - // check for pending exceptions { Label L; cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); @@ -145,17 +132,12 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) { -#ifdef _LP64 mov(c_rarg1, arg1); -#else - push(arg1); -#endif // _LP64 return call_RT(oop_result1, metadata_result, entry, 1); } int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) { -#ifdef _LP64 if (c_rarg1 == arg2) { if (c_rarg2 == arg1) { xchgq(arg1, arg2); @@ -167,16 +149,11 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre mov(c_rarg1, arg1); mov(c_rarg2, arg2); } -#else - push(arg2); - push(arg1); -#endif // _LP64 return call_RT(oop_result1, metadata_result, entry, 2); } int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) { -#ifdef _LP64 // if there is any conflict use the stack if (arg1 == c_rarg2 || arg1 == c_rarg3 || arg2 == c_rarg1 || arg2 == c_rarg3 || @@ -192,11 +169,6 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre mov(c_rarg2, arg2); mov(c_rarg3, arg3); } -#else - push(arg3); - push(arg2); - push(arg1); -#endif // _LP64 return call_RT(oop_result1, metadata_result, entry, 3); } @@ -263,20 +235,13 @@ const int xmm_regs_as_doubles_size_in_slots = FrameMap::nof_xmm_regs * 2; // but the code in save_live_registers will take the argument count into // account. // -#ifdef _LP64 - #define SLOT2(x) x, - #define SLOT_PER_WORD 2 -#else - #define SLOT2(x) - #define SLOT_PER_WORD 1 -#endif // _LP64 +#define SLOT2(x) x, +#define SLOT_PER_WORD 2 enum reg_save_layout { // 64bit needs to keep stack 16 byte aligned. So we add some alignment dummies to make that // happen and will assert if the stack size we create is misaligned -#ifdef _LP64 align_dummy_0, align_dummy_1, -#endif // _LP64 #ifdef _WIN64 // Windows always allocates space for it's argument registers (see // frame::arg_reg_save_area_bytes). @@ -292,7 +257,6 @@ enum reg_save_layout { fpu_state_end_off = fpu_state_off + (FPUStateSizeInWords / SLOT_PER_WORD), // 352 marker = fpu_state_end_off, SLOT2(markerH) // 352, 356 extra_space_offset, // 360 -#ifdef _LP64 r15_off = extra_space_offset, r15H_off, // 360, 364 r14_off, r14H_off, // 368, 372 r13_off, r13H_off, // 376, 380 @@ -302,9 +266,6 @@ enum reg_save_layout { r9_off, r9H_off, // 408, 412 r8_off, r8H_off, // 416, 420 rdi_off, rdiH_off, // 424, 428 -#else - rdi_off = extra_space_offset, -#endif // _LP64 rsi_off, SLOT2(rsiH_off) // 432, 436 rbp_off, SLOT2(rbpH_off) // 440, 444 rsp_off, SLOT2(rspH_off) // 448, 452 @@ -330,8 +291,8 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args, bool save_fpu_registers = true) { // In 64bit all the args are in regs so there are no additional stack slots - LP64_ONLY(num_rt_args = 0); - LP64_ONLY(assert((reg_save_frame_size * VMRegImpl::stack_slot_size) % 16 == 0, "must be 16 byte aligned");) + num_rt_args = 0; + assert((reg_save_frame_size * VMRegImpl::stack_slot_size) % 16 == 0, "must be 16 byte aligned"); int frame_size_in_slots = reg_save_frame_size + num_rt_args; // args + thread sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word); @@ -344,7 +305,6 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args, map->set_callee_saved(VMRegImpl::stack2reg(rbx_off + num_rt_args), rbx->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(rsi_off + num_rt_args), rsi->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(rdi_off + num_rt_args), rdi->as_VMReg()); -#ifdef _LP64 map->set_callee_saved(VMRegImpl::stack2reg(r8_off + num_rt_args), r8->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(r9_off + num_rt_args), r9->as_VMReg()); map->set_callee_saved(VMRegImpl::stack2reg(r10_off + num_rt_args), r10->as_VMReg()); @@ -370,52 +330,21 @@ static OopMap* generate_oop_map(StubAssembler* sasm, int num_rt_args, map->set_callee_saved(VMRegImpl::stack2reg(r13H_off + num_rt_args), r13->as_VMReg()->next()); map->set_callee_saved(VMRegImpl::stack2reg(r14H_off + num_rt_args), r14->as_VMReg()->next()); map->set_callee_saved(VMRegImpl::stack2reg(r15H_off + num_rt_args), r15->as_VMReg()->next()); -#endif // _LP64 int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms(); if (save_fpu_registers) { -#ifndef _LP64 - if (UseSSE < 2) { - int fpu_off = float_regs_as_doubles_off; - for (int n = 0; n < FrameMap::nof_fpu_regs; n++) { - VMReg fpu_name_0 = FrameMap::fpu_regname(n); - map->set_callee_saved(VMRegImpl::stack2reg(fpu_off + num_rt_args), fpu_name_0); + int xmm_off = xmm_regs_as_doubles_off; + for (int n = 0; n < FrameMap::nof_xmm_regs; n++) { + if (n < xmm_bypass_limit) { + VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg(); + map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0); // %%% This is really a waste but we'll keep things as they were for now - if (true) { - map->set_callee_saved(VMRegImpl::stack2reg(fpu_off + 1 + num_rt_args), fpu_name_0->next()); - } - fpu_off += 2; - } - assert(fpu_off == fpu_state_off, "incorrect number of fpu stack slots"); - - if (UseSSE == 1) { - int xmm_off = xmm_regs_as_doubles_off; - for (int n = 0; n < FrameMap::nof_fpu_regs; n++) { - VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg(); - map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0); - xmm_off += 2; - } - assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers"); + map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + 1 + num_rt_args), xmm_name_0->next()); } + xmm_off += 2; } -#endif // !LP64 - - if (UseSSE >= 2) { - int xmm_off = xmm_regs_as_doubles_off; - for (int n = 0; n < FrameMap::nof_xmm_regs; n++) { - if (n < xmm_bypass_limit) { - VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg(); - map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0); - // %%% This is really a waste but we'll keep things as they were for now - if (true) { - map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + 1 + num_rt_args), xmm_name_0->next()); - } - } - xmm_off += 2; - } - assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers"); - } + assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers"); } return map; @@ -427,11 +356,7 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) __ block_comment("save_live_registers"); // Push CPU state in multiple of 16 bytes -#ifdef _LP64 __ save_legacy_gprs(); -#else - __ pusha(); -#endif // assert(float_regs_as_doubles_off % 2 == 0, "misaligned offset"); // assert(xmm_regs_as_doubles_off % 2 == 0, "misaligned offset"); @@ -443,71 +368,25 @@ void C1_MacroAssembler::save_live_registers_no_oop_map(bool save_fpu_registers) #endif if (save_fpu_registers) { -#ifndef _LP64 - if (UseSSE < 2) { - // save FPU stack - __ fnsave(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size)); - __ fwait(); - -#ifdef ASSERT - Label ok; - __ cmpw(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size), StubRoutines::x86::fpu_cntrl_wrd_std()); - __ jccb(Assembler::equal, ok); - __ stop("corrupted control word detected"); - __ bind(ok); -#endif - - // Reset the control word to guard against exceptions being unmasked - // since fstp_d can cause FPU stack underflow exceptions. Write it - // into the on stack copy and then reload that to make sure that the - // current and future values are correct. - __ movw(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size), StubRoutines::x86::fpu_cntrl_wrd_std()); - __ frstor(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size)); - - // Save the FPU registers in de-opt-able form - int offset = 0; - for (int n = 0; n < FrameMap::nof_fpu_regs; n++) { - __ fstp_d(Address(rsp, float_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset)); - offset += 8; - } - - if (UseSSE == 1) { - // save XMM registers as float because double not supported without SSE2(num MMX == num fpu) - int offset = 0; - for (int n = 0; n < FrameMap::nof_fpu_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - __ movflt(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name); - offset += 8; - } - } - } -#endif // !_LP64 - - if (UseSSE >= 2) { - // save XMM registers - // XMM registers can contain float or double values, but this is not known here, - // so always save them as doubles. - // note that float values are _not_ converted automatically, so for float values - // the second word contains only garbage data. - int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms(); - int offset = 0; - for (int n = 0; n < xmm_bypass_limit; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name); - offset += 8; - } + // save XMM registers + // XMM registers can contain float or double values, but this is not known here, + // so always save them as doubles. + // note that float values are _not_ converted automatically, so for float values + // the second word contains only garbage data. + int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms(); + int offset = 0; + for (int n = 0; n < xmm_bypass_limit; n++) { + XMMRegister xmm_name = as_XMMRegister(n); + __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset), xmm_name); + offset += 8; } } - - // FPU stack must be empty now - NOT_LP64( __ verify_FPU(0, "save_live_registers"); ) } #undef __ #define __ sasm-> static void restore_fpu(C1_MacroAssembler* sasm, bool restore_fpu_registers) { -#ifdef _LP64 if (restore_fpu_registers) { // restore XMM registers int xmm_bypass_limit = FrameMap::get_num_caller_save_xmms(); @@ -518,38 +397,6 @@ static void restore_fpu(C1_MacroAssembler* sasm, bool restore_fpu_registers) { offset += 8; } } -#else - if (restore_fpu_registers) { - if (UseSSE >= 2) { - // restore XMM registers - int xmm_bypass_limit = FrameMap::nof_xmm_regs; - int offset = 0; - for (int n = 0; n < xmm_bypass_limit; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - __ movdbl(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset)); - offset += 8; - } - } else if (UseSSE == 1) { - // restore XMM registers(num MMX == num fpu) - int offset = 0; - for (int n = 0; n < FrameMap::nof_fpu_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - __ movflt(xmm_name, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + offset)); - offset += 8; - } - } - - if (UseSSE < 2) { - __ frstor(Address(rsp, fpu_state_off * VMRegImpl::stack_slot_size)); - } else { - // check that FPU stack is really empty - __ verify_FPU(0, "restore_live_registers"); - } - } else { - // check that FPU stack is really empty - __ verify_FPU(0, "restore_live_registers"); - } -#endif // _LP64 #ifdef ASSERT { @@ -571,12 +418,7 @@ void C1_MacroAssembler::restore_live_registers(bool restore_fpu_registers) { __ block_comment("restore_live_registers"); restore_fpu(this, restore_fpu_registers); -#ifdef _LP64 __ restore_legacy_gprs(); -#else - __ popa(); -#endif - } @@ -585,7 +427,6 @@ void C1_MacroAssembler::restore_live_registers_except_rax(bool restore_fpu_regis restore_fpu(this, restore_fpu_registers); -#ifdef _LP64 __ movptr(r15, Address(rsp, 0)); __ movptr(r14, Address(rsp, wordSize)); __ movptr(r13, Address(rsp, 2 * wordSize)); @@ -603,17 +444,6 @@ void C1_MacroAssembler::restore_live_registers_except_rax(bool restore_fpu_regis __ movptr(rcx, Address(rsp, 14 * wordSize)); __ addptr(rsp, 16 * wordSize); -#else - - __ pop(rdi); - __ pop(rsi); - __ pop(rbp); - __ pop(rbx); // skip this value - __ pop(rbx); - __ pop(rdx); - __ pop(rcx); - __ addptr(rsp, BytesPerWord); -#endif // _LP64 } #undef __ @@ -640,12 +470,7 @@ void Runtime1::initialize_pd() { // return: offset in 64-bit words. uint Runtime1::runtime_blob_current_thread_offset(frame f) { -#ifdef _LP64 return r15_off / 2; // rsp offsets are in halfwords -#else - Unimplemented(); - return 0; -#endif } // Target: the entry point of the method that creates and posts the exception oop. @@ -665,15 +490,8 @@ OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address targe // Load arguments for exception that are passed as arguments into the stub. if (has_argument) { -#ifdef _LP64 __ movptr(c_rarg1, Address(rbp, 2*BytesPerWord)); __ movptr(c_rarg2, Address(rbp, 3*BytesPerWord)); -#else - __ movptr(temp_reg, Address(rbp, 3*BytesPerWord)); - __ push(temp_reg); - __ movptr(temp_reg, Address(rbp, 2*BytesPerWord)); - __ push(temp_reg); -#endif // _LP64 } int call_offset = __ call_RT(noreg, noreg, target, num_rt_args - 1); @@ -693,7 +511,7 @@ OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) const Register exception_oop = rax; const Register exception_pc = rdx; // other registers used in this stub - const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); + const Register thread = r15_thread; // Save registers, if required. OopMapSet* oop_maps = new OopMapSet(); @@ -726,7 +544,7 @@ OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) case C1StubId::handle_exception_from_callee_id: { // At this point all registers except exception oop (RAX) and // exception pc (RDX) are dead. - const int frame_size = 2 /*BP, return address*/ NOT_LP64(+ 1 /*thread*/) WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord); + const int frame_size = 2 /*BP, return address*/ WIN64_ONLY(+ frame::arg_reg_save_area_bytes / BytesPerWord); oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0); sasm->set_frame_size(frame_size); WIN64_ONLY(__ subq(rsp, frame::arg_reg_save_area_bytes)); @@ -735,21 +553,11 @@ OopMapSet* Runtime1::generate_handle_exception(C1StubId id, StubAssembler *sasm) default: ShouldNotReachHere(); } -#if !defined(_LP64) && defined(COMPILER2) - if (UseSSE < 2 && !CompilerConfig::is_c1_only_no_jvmci()) { - // C2 can leave the fpu stack dirty - __ empty_FPU_stack(); - } -#endif // !_LP64 && COMPILER2 - // verify that only rax, and rdx is valid at this time __ invalidate_registers(false, true, true, false, true, true); // verify that rax, contains a valid exception __ verify_not_null_oop(exception_oop); - // load address of JavaThread object for thread-local data - NOT_LP64(__ get_thread(thread);) - #ifdef ASSERT // check that fields in JavaThread for exception oop and issuing pc are // empty before writing to them @@ -816,11 +624,11 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { // incoming parameters const Register exception_oop = rax; // callee-saved copy of exception_oop during runtime call - const Register exception_oop_callee_saved = NOT_LP64(rsi) LP64_ONLY(r14); + const Register exception_oop_callee_saved = r14; // other registers used in this stub const Register exception_pc = rdx; const Register handler_addr = rbx; - const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); + const Register thread = r15_thread; if (AbortVMOnException) { __ enter(); @@ -835,7 +643,6 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { #ifdef ASSERT // check that fields in JavaThread for exception oop and issuing pc are empty - NOT_LP64(__ get_thread(thread);) Label oop_empty; __ cmpptr(Address(thread, JavaThread::exception_oop_offset()), 0); __ jcc(Assembler::equal, oop_empty); @@ -849,14 +656,10 @@ void Runtime1::generate_unwind_exception(StubAssembler *sasm) { __ bind(pc_empty); #endif - // clear the FPU stack in case any FPU results are left behind - NOT_LP64( __ empty_FPU_stack(); ) - // save exception_oop in callee-saved register to preserve it during runtime calls __ verify_not_null_oop(exception_oop); __ movptr(exception_oop_callee_saved, exception_oop); - NOT_LP64(__ get_thread(thread);) // Get return address (is on top of stack after leave). __ movptr(exception_pc, Address(rsp, 0)); @@ -906,18 +709,9 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { OopMap* oop_map = save_live_registers(sasm, num_rt_args); -#ifdef _LP64 const Register thread = r15_thread; // No need to worry about dummy __ mov(c_rarg0, thread); -#else - __ push(rax); // push dummy - - const Register thread = rdi; // is callee-saved register (Visual C++ calling conventions) - // push java thread (becomes first argument of C function) - __ get_thread(thread); - __ push(thread); -#endif // _LP64 __ set_last_Java_frame(thread, noreg, rbp, nullptr, rscratch1); // do the call __ call(RuntimeAddress(target)); @@ -937,10 +731,6 @@ OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) { __ pop(rax); #endif __ reset_last_Java_frame(thread, true); -#ifndef _LP64 - __ pop(rcx); // discard thread arg - __ pop(rcx); // discard dummy -#endif // _LP64 // check for pending exceptions { Label L; @@ -1167,15 +957,8 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { // This is called via call_runtime so the arguments // will be place in C abi locations -#ifdef _LP64 __ verify_oop(c_rarg0); __ mov(rax, c_rarg0); -#else - // The object is passed on the stack and we haven't pushed a - // frame yet so it's one work away from top of stack. - __ movptr(rax, Address(rsp, 1 * BytesPerWord)); - __ verify_oop(rax); -#endif // _LP64 // load the klass and check the has finalizer flag Label register_finalizer; @@ -1414,9 +1197,8 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { // the live registers get saved. save_live_registers(sasm, 1); - __ NOT_LP64(push(rax)) LP64_ONLY(mov(c_rarg0, rax)); + __ mov(c_rarg0, rax); __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, static_cast(SharedRuntime::dtrace_object_alloc)))); - NOT_LP64(__ pop(rax)); restore_live_registers(sasm); } @@ -1424,7 +1206,6 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { case C1StubId::fpu2long_stub_id: { -#ifdef _LP64 Label done; __ cvttsd2siq(rax, Address(rsp, wordSize)); __ cmp64(rax, ExternalAddress((address) StubRoutines::x86::double_sign_flip())); @@ -1436,78 +1217,6 @@ OopMapSet* Runtime1::generate_code_for(C1StubId id, StubAssembler* sasm) { __ pop(rax); __ bind(done); __ ret(0); -#else - // rax, and rdx are destroyed, but should be free since the result is returned there - // preserve rsi,ecx - __ push(rsi); - __ push(rcx); - - // check for NaN - Label return0, do_return, return_min_jlong, do_convert; - - Address value_high_word(rsp, wordSize + 4); - Address value_low_word(rsp, wordSize); - Address result_high_word(rsp, 3*wordSize + 4); - Address result_low_word(rsp, 3*wordSize); - - __ subptr(rsp, 32); // more than enough on 32bit - __ fst_d(value_low_word); - __ movl(rax, value_high_word); - __ andl(rax, 0x7ff00000); - __ cmpl(rax, 0x7ff00000); - __ jcc(Assembler::notEqual, do_convert); - __ movl(rax, value_high_word); - __ andl(rax, 0xfffff); - __ orl(rax, value_low_word); - __ jcc(Assembler::notZero, return0); - - __ bind(do_convert); - __ fnstcw(Address(rsp, 0)); - __ movzwl(rax, Address(rsp, 0)); - __ orl(rax, 0xc00); - __ movw(Address(rsp, 2), rax); - __ fldcw(Address(rsp, 2)); - __ fwait(); - __ fistp_d(result_low_word); - __ fldcw(Address(rsp, 0)); - __ fwait(); - // This gets the entire long in rax on 64bit - __ movptr(rax, result_low_word); - // testing of high bits - __ movl(rdx, result_high_word); - __ mov(rcx, rax); - // What the heck is the point of the next instruction??? - __ xorl(rcx, 0x0); - __ movl(rsi, 0x80000000); - __ xorl(rsi, rdx); - __ orl(rcx, rsi); - __ jcc(Assembler::notEqual, do_return); - __ fldz(); - __ fcomp_d(value_low_word); - __ fnstsw_ax(); - __ sahf(); - __ jcc(Assembler::above, return_min_jlong); - // return max_jlong - __ movl(rdx, 0x7fffffff); - __ movl(rax, 0xffffffff); - __ jmp(do_return); - - __ bind(return_min_jlong); - __ movl(rdx, 0x80000000); - __ xorl(rax, rax); - __ jmp(do_return); - - __ bind(return0); - __ fpop(); - __ xorptr(rdx,rdx); - __ xorptr(rax,rax); - - __ bind(do_return); - __ addptr(rsp, 32); - __ pop(rcx); - __ pop(rsi); - __ ret(0); -#endif // _LP64 } break; diff --git a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp index 44f897529e7ce..3aba7fcf9d579 100644 --- a/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c2_CodeStubs_x86.cpp @@ -44,22 +44,8 @@ void C2SafepointPollStub::emit(C2_MacroAssembler& masm) { __ bind(entry()); InternalAddress safepoint_pc(masm.pc() - masm.offset() + _safepoint_offset); -#ifdef _LP64 __ lea(rscratch1, safepoint_pc); __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); -#else - const Register tmp1 = rcx; - const Register tmp2 = rdx; - __ push(tmp1); - __ push(tmp2); - - __ lea(tmp1, safepoint_pc); - __ get_thread(tmp2); - __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1); - - __ pop(tmp2); - __ pop(tmp1); -#endif __ jump(callback_addr); } diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 50ed4750d47fd..4df26c69c5bd5 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -50,7 +50,7 @@ #endif // C2 compiled method's prolog code. -void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub) { +void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool is_stub) { // WARNING: Initial instruction MUST be 5 bytes or longer so that // NativeJump::patch_verified_entry will be able to patch out the entry @@ -108,16 +108,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool movptr(Address(rsp, framesize), (int32_t)0xbadb100d); } -#ifndef _LP64 - // If method sets FPU control word do it now - if (fp_mode_24b) { - fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } - if (UseSSE >= 2 && VerifyFPU) { - verify_FPU(0, "FPU stack must be clean on entry"); - } -#endif - #ifdef ASSERT if (VerifyStackAtCalls) { Label L; @@ -134,7 +124,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool if (!is_stub) { BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - #ifdef _LP64 if (BarrierSet::barrier_set()->barrier_set_nmethod() != nullptr) { // We put the non-hot code of the nmethod entry barrier out-of-line in a stub. Label dummy_slow_path; @@ -150,10 +139,6 @@ void C2_MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool } bs->nmethod_entry_barrier(this, slow_path, continuation); } -#else - // Don't bother with out-of-line nmethod entry barrier stub for x86_32. - bs->nmethod_entry_barrier(this, nullptr /* slow_path */, nullptr /* continuation */); -#endif } } @@ -302,7 +287,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp // Locked by current thread if difference with current SP is less than one page. subptr(tmpReg, rsp); // Next instruction set ZFlag == 1 (Success) if difference is less then one page. - andptr(tmpReg, (int32_t) (NOT_LP64(0xFFFFF003) LP64_ONLY(7 - (int)os::vm_page_size())) ); + andptr(tmpReg, (int32_t) (7 - (int)os::vm_page_size())); movptr(Address(boxReg, 0), tmpReg); } jmp(DONE_LABEL); @@ -310,10 +295,6 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp bind(IsInflated); // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markWord::monitor_value -#ifndef _LP64 - // Just take slow path to avoid dealing with 64 bit atomic instructions here. - orl(boxReg, 1); // set ICC.ZF=0 to indicate failure -#else // Unconditionally set box->_displaced_header = markWord::unused_mark(). // Without cast to int32_t this style of movptr will destroy r10 which is typically obj. movptr(Address(boxReg, 0), checked_cast(markWord::unused_mark().value())); @@ -332,7 +313,7 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp jccb(Assembler::notEqual, NO_COUNT); // If not recursive, ZF = 0 at this point (fail) incq(Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions))); xorq(rax, rax); // Set ZF = 1 (success) for recursive lock, denoting locking success -#endif // _LP64 + bind(DONE_LABEL); // ZFlag == 1 count in fast path @@ -341,10 +322,8 @@ void C2_MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmp bind(COUNT); if (LockingMode == LM_LEGACY) { -#ifdef _LP64 // Count monitors in fast path increment(Address(thread, JavaThread::held_monitor_count_offset())); -#endif } xorl(tmpReg, tmpReg); // Set ZF == 1 @@ -407,11 +386,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t // It's inflated. -#ifndef _LP64 - // Just take slow path to avoid dealing with 64 bit atomic instructions here. - orl(boxReg, 1); // set ICC.ZF=0 to indicate failure - jmpb(DONE_LABEL); -#else // Despite our balanced locking property we still check that m->_owner == Self // as java routines or native JNI code called by this thread might // have released the lock. @@ -468,7 +442,6 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t bind (LSuccess); testl (boxReg, 0); // set ICC.ZF=1 to indicate success jmpb (DONE_LABEL); -#endif // _LP64 if (LockingMode == LM_LEGACY) { bind (Stacked); @@ -488,9 +461,7 @@ void C2_MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register t if (LockingMode == LM_LEGACY) { // Count monitors in fast path -#ifdef _LP64 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset())); -#endif } xorl(tmpReg, tmpReg); // Set ZF == 1 @@ -569,11 +540,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist { // Handle inflated monitor. bind(inflated); -#ifndef _LP64 - // Just take slow path to avoid dealing with 64 bit atomic instructions here. - orl(box, 1); // set ICC.ZF=0 to indicate failure - jmpb(slow_path); -#else const Register monitor = t; if (!UseObjectMonitorTable) { @@ -639,7 +605,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, Regist increment(recursions_address); bind(monitor_locked); -#endif // _LP64 } bind(locked); @@ -752,11 +717,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, bind(inflated); -#ifndef _LP64 - // Just take slow path to avoid dealing with 64 bit atomic instructions here. - orl(t, 1); // set ICC.ZF=0 to indicate failure - jmpb(slow_path); -#else if (!UseObjectMonitorTable) { assert(mark == monitor, "should be the same here"); } else { @@ -808,7 +768,6 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register reg_rax, // Recursive unlock. bind(recursive); decrement(recursions_address); -#endif // _LP64 } bind(unlocked); @@ -1182,7 +1141,6 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, Label DONE_LABEL; if (opcode == Op_SignumF) { - assert(UseSSE > 0, "required"); ucomiss(dst, zero); jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN @@ -1190,7 +1148,6 @@ void C2_MacroAssembler::signum_fp(int opcode, XMMRegister dst, XMMRegister zero, jcc(Assembler::above, DONE_LABEL); xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), noreg); } else if (opcode == Op_SignumD) { - assert(UseSSE > 1, "required"); ucomisd(dst, zero); jcc(Assembler::equal, DONE_LABEL); // handle special case +0.0/-0.0, if argument is +0.0/-0.0, return argument jcc(Assembler::parity, DONE_LABEL); // handle special case NaN, if argument NaN, return NaN @@ -1530,7 +1487,6 @@ void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, } } -#ifdef _LP64 void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, @@ -1569,7 +1525,6 @@ void C2_MacroAssembler::vgather8b_masked_offset(BasicType elem_bt, } } } -#endif // _LP64 void C2_MacroAssembler::vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, @@ -1641,7 +1596,7 @@ void C2_MacroAssembler::vgather_subword(BasicType elem_ty, XMMRegister dst, if (mask == noreg) { vgather8b_offset(elem_ty, temp_dst, base, idx_base, offset, rtmp, vlen_enc); } else { - LP64_ONLY(vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc)); + vgather8b_masked_offset(elem_ty, temp_dst, base, idx_base, offset, mask, mask_idx, rtmp, vlen_enc); } // TEMP_PERM_VEC(temp_dst) = PERMUTE TMP_VEC_64(temp_dst) PERM_INDEX(xtmp1) vpermd(temp_dst, xtmp1, temp_dst, vlen_enc == Assembler::AVX_512bit ? vlen_enc : Assembler::AVX_256bit); @@ -2045,7 +2000,6 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen, } } -#ifdef _LP64 void C2_MacroAssembler::reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { @@ -2057,7 +2011,6 @@ void C2_MacroAssembler::reduceL(int opcode, int vlen, default: assert(false, "wrong vector length"); } } -#endif // _LP64 void C2_MacroAssembler::reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { switch (vlen) { @@ -2307,7 +2260,6 @@ void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRe reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); } -#ifdef _LP64 void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { pshufd(vtmp2, src2, 0xE); reduce_operation_128(T_LONG, opcode, vtmp2, src2); @@ -2333,7 +2285,6 @@ void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) { bzhiq(temp, temp, len); kmovql(dst, temp); } -#endif // _LP64 void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { reduce_operation_128(T_FLOAT, opcode, dst, src); @@ -2749,7 +2700,6 @@ void C2_MacroAssembler::vpadd(BasicType elem_bt, XMMRegister dst, XMMRegister sr } } -#ifdef _LP64 void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc) { assert(UseAVX >= 2, "required"); bool is_bw = ((elem_bt == T_BYTE) || (elem_bt == T_SHORT)); @@ -2778,7 +2728,6 @@ void C2_MacroAssembler::vpbroadcast(BasicType elem_bt, XMMRegister dst, Register } } } -#endif void C2_MacroAssembler::vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc) { switch (to_elem_bt) { @@ -3846,7 +3795,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, // In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest) bind(COMPARE_WIDE_VECTORS_LOOP); -#ifdef _LP64 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop cmpl(cnt2, stride2x2); jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2); @@ -3870,8 +3818,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, vpxor(vec1, vec1); jmpb(COMPARE_WIDE_TAIL); }//if (VM_Version::supports_avx512vlbw()) -#endif // _LP64 - bind(COMPARE_WIDE_VECTORS_LOOP_AVX2); if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) { @@ -4040,7 +3986,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, } jmpb(DONE_LABEL); -#ifdef _LP64 if (VM_Version::supports_avx512vlbw()) { bind(COMPARE_WIDE_VECTORS_LOOP_FAILED); @@ -4066,7 +4011,6 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2, subl(result, cnt1); jmpb(POP_LABEL); }//if (VM_Version::supports_avx512vlbw()) -#endif // _LP64 // Discard the stored length difference bind(POP_LABEL); @@ -4141,7 +4085,6 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len, // check the tail for absense of negatives // ~(~0 << len) applied up to two times (for 32-bit scenario) -#ifdef _LP64 { Register tmp3_aliased = len; mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF); @@ -4149,33 +4092,7 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len, notq(tmp3_aliased); kmovql(mask2, tmp3_aliased); } -#else - Label k_init; - jmp(k_init); - - // We could not read 64-bits from a general purpose register thus we move - // data required to compose 64 1's to the instruction stream - // We emit 64 byte wide series of elements from 0..63 which later on would - // be used as a compare targets with tail count contained in tmp1 register. - // Result would be a k register having tmp1 consecutive number or 1 - // counting from least significant bit. - address tmp = pc(); - emit_int64(0x0706050403020100); - emit_int64(0x0F0E0D0C0B0A0908); - emit_int64(0x1716151413121110); - emit_int64(0x1F1E1D1C1B1A1918); - emit_int64(0x2726252423222120); - emit_int64(0x2F2E2D2C2B2A2928); - emit_int64(0x3736353433323130); - emit_int64(0x3F3E3D3C3B3A3938); - - bind(k_init); - lea(len, InternalAddress(tmp)); - // create mask to test for negative byte inside a vector - evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit); - evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit); -#endif evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit); ktestq(mask1, mask2); jcc(Assembler::zero, DONE); @@ -4198,7 +4115,7 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len, // Fallthru to tail compare } else { - if (UseAVX >= 2 && UseSSE >= 2) { + if (UseAVX >= 2) { // With AVX2, use 32-byte vector compare Label COMPARE_WIDE_VECTORS, BREAK_LOOP; @@ -4345,7 +4262,7 @@ void C2_MacroAssembler::count_positives(Register ary1, Register len, // That's it bind(DONE); - if (UseAVX >= 2 && UseSSE >= 2) { + if (UseAVX >= 2) { // clean upper bits of YMM registers vpxor(vec1, vec1); vpxor(vec2, vec2); @@ -4422,7 +4339,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register lea(ary2, Address(ary2, limit, Address::times_1)); negptr(limit); -#ifdef _LP64 if ((AVX3Threshold == 0) && VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop Label COMPARE_WIDE_VECTORS_LOOP_AVX2, COMPARE_WIDE_VECTORS_LOOP_AVX3; @@ -4459,7 +4375,7 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register bind(COMPARE_WIDE_VECTORS_LOOP_AVX2); }//if (VM_Version::supports_avx512vlbw()) -#endif //_LP64 + bind(COMPARE_WIDE_VECTORS); vmovdqu(vec1, Address(ary1, limit, scaleFactor)); if (expand_ary2) { @@ -4626,8 +4542,6 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register } } -#ifdef _LP64 - static void convertF2I_slowpath(C2_MacroAssembler& masm, C2GeneralStub& stub) { #define __ masm. Register dst = stub.data<0>(); @@ -4674,8 +4588,6 @@ void C2_MacroAssembler::convertF2I(BasicType dst_bt, BasicType src_bt, Register bind(stub->continuation()); } -#endif // _LP64 - void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, XMMRegister src1, int imm8, bool merge, int vlen_enc) { switch(ideal_opc) { @@ -5335,7 +5247,6 @@ void C2_MacroAssembler::vector_castD2X_evex(BasicType to_elem_bt, XMMRegister ds } } -#ifdef _LP64 void C2_MacroAssembler::vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2) { @@ -5387,7 +5298,6 @@ void C2_MacroAssembler::vector_round_float_avx(XMMRegister dst, XMMRegister src, ldmxcsr(ExternalAddress(StubRoutines::x86::addr_mxcsr_std()), tmp /*rscratch*/); } -#endif // _LP64 void C2_MacroAssembler::vector_unsigned_cast(XMMRegister dst, XMMRegister src, int vlen_enc, BasicType from_elem_bt, BasicType to_elem_bt) { @@ -5518,7 +5428,6 @@ void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XM } } -#ifdef _LP64 void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc) { @@ -5776,7 +5685,6 @@ void C2_MacroAssembler::vector_compress_expand(int opcode, XMMRegister dst, XMMR } } } -#endif void C2_MacroAssembler::vector_signum_evex(int opcode, XMMRegister dst, XMMRegister src, XMMRegister zero, XMMRegister one, KRegister ktmp1, int vec_enc) { @@ -5841,8 +5749,7 @@ void C2_MacroAssembler::vector_maskall_operation(KRegister dst, Register src, in void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Register rtmp, int vec_enc) { int lane_size = type2aelembytes(bt); - bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); - if ((is_LP64 || lane_size < 8) && + if ((lane_size < 8) && ((is_non_subword_integral_type(bt) && VM_Version::supports_avx512vl()) || (is_subword_type(bt) && VM_Version::supports_avx512vlbw()))) { movptr(rtmp, imm32); @@ -5856,7 +5763,7 @@ void C2_MacroAssembler::vbroadcast(BasicType bt, XMMRegister dst, int imm32, Reg } } else { movptr(rtmp, imm32); - LP64_ONLY(movq(dst, rtmp)) NOT_LP64(movdl(dst, rtmp)); + movq(dst, rtmp); switch(lane_size) { case 1 : vpbroadcastb(dst, dst, vec_enc); break; case 2 : vpbroadcastw(dst, dst, vec_enc); break; @@ -5991,14 +5898,6 @@ void C2_MacroAssembler::vector_popcount_integral_evex(BasicType bt, XMMRegister } } -#ifndef _LP64 -void C2_MacroAssembler::vector_maskall_operation32(KRegister dst, Register src, KRegister tmp, int mask_len) { - assert(VM_Version::supports_avx512bw(), ""); - kmovdl(tmp, src); - kunpckdql(dst, tmp, tmp); -} -#endif - // Bit reversal algorithm first reverses the bits of each byte followed by // a byte level reversal for multi-byte primitive types (short/int/long). // Algorithm performs a lookup table access to get reverse bit sequence @@ -6452,7 +6351,6 @@ void C2_MacroAssembler::udivmodI(Register rax, Register divisor, Register rdx, R bind(done); } -#ifdef _LP64 void C2_MacroAssembler::reverseI(Register dst, Register src, XMMRegister xtmp1, XMMRegister xtmp2, Register rtmp) { if(VM_Version::supports_gfni()) { @@ -6616,7 +6514,6 @@ void C2_MacroAssembler::udivmodL(Register rax, Register divisor, Register rdx, R subq(rdx, tmp); // remainder bind(done); } -#endif void C2_MacroAssembler::rearrange_bytes(XMMRegister dst, XMMRegister shuffle, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, KRegister ktmp, diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index 6e49cdefa6c94..d3d3f71fc0d8d 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -29,7 +29,7 @@ public: // C2 compiled method's prolog code. - void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub); + void verified_entry(int framesize, int stack_bang_size, bool is_stub); Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); @@ -130,9 +130,7 @@ // Covert B2X void vconvert_b2x(BasicType to_elem_bt, XMMRegister dst, XMMRegister src, int vlen_enc); -#ifdef _LP64 void vpbroadcast(BasicType elem_bt, XMMRegister dst, Register src, int vlen_enc); -#endif // blend void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, XMMRegister src2, int comparison, int vector_len); @@ -152,10 +150,8 @@ // dst = src1 reduce(op, src2) using vtmp as temps void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); -#ifdef _LP64 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void genmask(KRegister dst, Register len, Register temp); -#endif // _LP64 // dst = reduce(op, src2) using vtmp as temps void reduce_fp(int opcode, int vlen, @@ -202,11 +198,9 @@ void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); // Long Reduction -#ifdef _LP64 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); -#endif // _LP64 // Float Reduction void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); @@ -237,7 +231,6 @@ void unordered_reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); public: -#ifdef _LP64 void vector_mask_operation_helper(int opc, Register dst, Register tmp, int masklen); void vector_mask_operation(int opc, Register dst, KRegister mask, Register tmp, int masklen, int masksize, int vec_enc); @@ -246,14 +239,9 @@ Register tmp, int masklen, BasicType bt, int vec_enc); void vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1, Register rtmp2, XMMRegister xtmp, int mask_len, int vec_enc); -#endif void vector_maskall_operation(KRegister dst, Register src, int mask_len); -#ifndef _LP64 - void vector_maskall_operation32(KRegister dst, Register src, KRegister ktmp, int mask_len); -#endif - void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); @@ -313,9 +301,7 @@ void arrays_hashcode_elvload(XMMRegister dst, AddressLiteral src, BasicType eltype); void arrays_hashcode_elvcast(XMMRegister dst, BasicType eltype); -#ifdef _LP64 void convertF2I(BasicType dst_bt, BasicType src_bt, Register dst, XMMRegister src); -#endif void evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst, XMMRegister src1, XMMRegister src2, @@ -390,7 +376,6 @@ void vector_mask_cast(XMMRegister dst, XMMRegister src, BasicType dst_bt, BasicType src_bt, int vlen); -#ifdef _LP64 void vector_round_double_evex(XMMRegister dst, XMMRegister src, AddressLiteral double_sign_flip, AddressLiteral new_mxcsr, int vec_enc, Register tmp, XMMRegister xtmp1, XMMRegister xtmp2, KRegister ktmp1, KRegister ktmp2); @@ -403,13 +388,11 @@ void vector_compress_expand_avx2(int opcode, XMMRegister dst, XMMRegister src, XMMRegister mask, Register rtmp, Register rscratch, XMMRegister permv, XMMRegister xtmp, BasicType bt, int vec_enc); -#endif // _LP64 void udivI(Register rax, Register divisor, Register rdx); void umodI(Register rax, Register divisor, Register rdx); void udivmodI(Register rax, Register divisor, Register rdx, Register tmp); -#ifdef _LP64 void reverseI(Register dst, Register src, XMMRegister xtmp1, XMMRegister xtmp2, Register rtmp); void reverseL(Register dst, Register src, XMMRegister xtmp1, @@ -417,7 +400,6 @@ void udivL(Register rax, Register divisor, Register rdx); void umodL(Register rax, Register divisor, Register rdx); void udivmodL(Register rax, Register divisor, Register rdx, Register tmp); -#endif void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, BasicType bt, int vlen_enc); @@ -510,10 +492,8 @@ Register mask, XMMRegister xtmp1, XMMRegister xtmp2, XMMRegister xtmp3, Register rtmp, Register midx, Register length, int vector_len, int vlen_enc); -#ifdef _LP64 void vgather8b_masked_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, Register offset, Register mask, Register midx, Register rtmp, int vlen_enc); -#endif void vgather8b_offset(BasicType elem_bt, XMMRegister dst, Register base, Register idx_base, Register offset, Register rtmp, int vlen_enc); diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp index 64dda0a8947e4..5673e7141370b 100644 --- a/src/hotspot/cpu/x86/c2_globals_x86.hpp +++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp @@ -45,7 +45,6 @@ define_pd_global(intx, ConditionalMoveLimit, 3); define_pd_global(intx, FreqInlineSize, 325); define_pd_global(intx, MinJumpTableSize, 10); define_pd_global(intx, LoopPercentProfileLimit, 10); -#ifdef AMD64 define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, LoopUnrollLimit, 60); @@ -55,17 +54,6 @@ define_pd_global(uintx, CodeCacheExpansionSize, 64*K); // Ergonomics related flags define_pd_global(uint64_t, MaxRAM, 128ULL*G); -#else -define_pd_global(intx, InteriorEntryAlignment, 4); -define_pd_global(size_t, NewSizeThreadIncrease, 4*K); -define_pd_global(intx, LoopUnrollLimit, 50); // Design center runs on 1.3.1 -// InitialCodeCacheSize derived from specjbb2000 run. -define_pd_global(uintx, InitialCodeCacheSize, 2304*K); // Integral multiple of CodeCacheExpansionSize -define_pd_global(uintx, CodeCacheExpansionSize, 32*K); - -// Ergonomics related flags -define_pd_global(uint64_t, MaxRAM, 4ULL*G); -#endif // AMD64 define_pd_global(intx, RegisterCostAreaRatio, 16000); // Peephole and CISC spilling both break the graph, and so makes the diff --git a/src/hotspot/cpu/x86/c2_init_x86.cpp b/src/hotspot/cpu/x86/c2_init_x86.cpp index ee8937230b7b4..9e2d917bdcdaa 100644 --- a/src/hotspot/cpu/x86/c2_init_x86.cpp +++ b/src/hotspot/cpu/x86/c2_init_x86.cpp @@ -34,13 +34,6 @@ extern void reg_mask_init(); void Compile::pd_compiler2_init() { guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); - // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could - // simply be left out. -#ifndef AMD64 - if (!VM_Version::supports_cmov()) { - ConditionalMoveLimit = 0; - } -#endif // AMD64 if (UseAVX < 3) { int delta = XMMRegister::max_slots_per_register * XMMRegister::number_of_registers; diff --git a/src/hotspot/cpu/x86/compiledIC_x86.cpp b/src/hotspot/cpu/x86/compiledIC_x86.cpp index 51563d35d5dfd..72f5100a06479 100644 --- a/src/hotspot/cpu/x86/compiledIC_x86.cpp +++ b/src/hotspot/cpu/x86/compiledIC_x86.cpp @@ -62,8 +62,7 @@ address CompiledDirectCall::emit_to_interp_stub(MacroAssembler *masm, address ma #undef __ int CompiledDirectCall::to_interp_stub_size() { - return NOT_LP64(10) // movl; jmp - LP64_ONLY(15); // movq (1+1+8); jmp (1+4) + return 15; // movq (1+1+8); jmp (1+4) } int CompiledDirectCall::to_trampoline_stub_size() { diff --git a/src/hotspot/cpu/x86/compressedKlass_x86.cpp b/src/hotspot/cpu/x86/compressedKlass_x86.cpp index 5b5a405bcef86..5eb6d98b4ea8a 100644 --- a/src/hotspot/cpu/x86/compressedKlass_x86.cpp +++ b/src/hotspot/cpu/x86/compressedKlass_x86.cpp @@ -25,8 +25,6 @@ #include "precompiled.hpp" -#ifdef _LP64 - #include "oops/compressedKlass.hpp" #include "utilities/globalDefinitions.hpp" @@ -45,5 +43,3 @@ char* CompressedKlassPointers::reserve_address_space_for_compressed_classes(size // Nothing more to optimize for on x64. If base != 0, we will always emit the full 64-bit immediate. return result; } - -#endif // _LP64 diff --git a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp index ba8fcb3aa9c51..ab536d12a75ed 100644 --- a/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp +++ b/src/hotspot/cpu/x86/continuationFreezeThaw_x86.inline.hpp @@ -261,14 +261,12 @@ template frame ThawBase::new_stack_frame(const frame& hf, frame& } inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& caller, bool bottom) { -#ifdef _LP64 if (((intptr_t)frame_sp & 0xf) != 0) { assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), ""); frame_sp--; caller.set_sp(caller.sp() - 1); } assert(is_aligned(frame_sp, frame::frame_alignment), ""); -#endif return frame_sp; } diff --git a/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp b/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp index 46fe0946951e5..6d72e1b80e893 100644 --- a/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp +++ b/src/hotspot/cpu/x86/continuationHelper_x86.inline.hpp @@ -55,18 +55,11 @@ static inline void patch_return_pc_with_preempt_stub(frame& f) { } inline int ContinuationHelper::frame_align_words(int size) { -#ifdef _LP64 return size & 1; -#else - return 0; -#endif } inline intptr_t* ContinuationHelper::frame_align_pointer(intptr_t* sp) { -#ifdef _LP64 - sp = align_down(sp, frame::frame_alignment); -#endif - return sp; + return align_down(sp, frame::frame_alignment); } template diff --git a/src/hotspot/cpu/x86/copy_x86.hpp b/src/hotspot/cpu/x86/copy_x86.hpp index 1798e74eb0636..110900af62f24 100644 --- a/src/hotspot/cpu/x86/copy_x86.hpp +++ b/src/hotspot/cpu/x86/copy_x86.hpp @@ -28,19 +28,11 @@ #include OS_CPU_HEADER(copy) static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { -#ifdef AMD64 julong* to = (julong*) tohw; julong v = ((julong) value << 32) | value; while (count-- > 0) { *to++ = v; } -#else - juint* to = (juint*)tohw; - count *= HeapWordSize / BytesPerInt; - while (count-- > 0) { - *to++ = value; - } -#endif // AMD64 } static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { @@ -60,52 +52,10 @@ static void pd_zero_to_bytes(void* to, size_t count) { } static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -#if defined AMD64 || defined _WINDOWS (void)memmove(to, from, count * HeapWordSize); -#else - // Includes a zero-count check. - intx temp = 0; - __asm__ volatile(" testl %6,%6 ;" - " jz 7f ;" - " cmpl %4,%5 ;" - " leal -4(%4,%6,4),%3;" - " jbe 1f ;" - " cmpl %7,%5 ;" - " jbe 4f ;" - "1: cmpl $32,%6 ;" - " ja 3f ;" - " subl %4,%1 ;" - "2: movl (%4),%3 ;" - " movl %7,(%5,%4,1) ;" - " addl $4,%0 ;" - " subl $1,%2 ;" - " jnz 2b ;" - " jmp 7f ;" - "3: rep; smovl ;" - " jmp 7f ;" - "4: cmpl $32,%2 ;" - " movl %7,%0 ;" - " leal -4(%5,%6,4),%1;" - " ja 6f ;" - " subl %4,%1 ;" - "5: movl (%4),%3 ;" - " movl %7,(%5,%4,1) ;" - " subl $4,%0 ;" - " subl $1,%2 ;" - " jnz 5b ;" - " jmp 7f ;" - "6: std ;" - " rep; smovl ;" - " cld ;" - "7: nop " - : "=S" (from), "=D" (to), "=c" (count), "=r" (temp) - : "0" (from), "1" (to), "2" (count), "3" (temp) - : "memory", "flags"); -#endif // AMD64 } static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { -#ifdef AMD64 switch (count) { case 8: to[7] = from[7]; case 7: to[6] = from[6]; @@ -120,39 +70,10 @@ static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) (void)memcpy(to, from, count * HeapWordSize); break; } -#else -#if defined _WINDOWS - (void)memcpy(to, from, count * HeapWordSize); -#else - // Includes a zero-count check. - intx temp = 0; - __asm__ volatile(" testl %6,%6 ;" - " jz 3f ;" - " cmpl $32,%6 ;" - " ja 2f ;" - " subl %4,%1 ;" - "1: movl (%4),%3 ;" - " movl %7,(%5,%4,1);" - " addl $4,%0 ;" - " subl $1,%2 ;" - " jnz 1b ;" - " jmp 3f ;" - "2: rep; smovl ;" - "3: nop " - : "=S" (from), "=D" (to), "=c" (count), "=r" (temp) - : "0" (from), "1" (to), "2" (count), "3" (temp) - : "memory", "cc"); -#endif // _WINDOWS -#endif // AMD64 } static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { -#ifdef AMD64 shared_disjoint_words_atomic(from, to, count); -#else - // pd_disjoint_words is word-atomic in this implementation. - pd_disjoint_words(from, to, count); -#endif // AMD64 } static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { @@ -164,82 +85,7 @@ static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t } static void pd_conjoint_bytes(const void* from, void* to, size_t count) { -#if defined AMD64 || defined _WINDOWS (void)memmove(to, from, count); -#else - // Includes a zero-count check. - intx temp = 0; - __asm__ volatile(" testl %6,%6 ;" - " jz 13f ;" - " cmpl %4,%5 ;" - " leal -1(%4,%6),%3 ;" - " jbe 1f ;" - " cmpl %7,%5 ;" - " jbe 8f ;" - "1: cmpl $3,%6 ;" - " jbe 6f ;" - " movl %6,%3 ;" - " movl $4,%2 ;" - " subl %4,%2 ;" - " andl $3,%2 ;" - " jz 2f ;" - " subl %6,%3 ;" - " rep; smovb ;" - "2: movl %7,%2 ;" - " shrl $2,%2 ;" - " jz 5f ;" - " cmpl $32,%2 ;" - " ja 4f ;" - " subl %4,%1 ;" - "3: movl (%4),%%edx ;" - " movl %%edx,(%5,%4,1);" - " addl $4,%0 ;" - " subl $1,%2 ;" - " jnz 3b ;" - " addl %4,%1 ;" - " jmp 5f ;" - "4: rep; smovl ;" - "5: movl %7,%2 ;" - " andl $3,%2 ;" - " jz 13f ;" - "6: xorl %7,%3 ;" - "7: movb (%4,%7,1),%%dl ;" - " movb %%dl,(%5,%7,1) ;" - " addl $1,%3 ;" - " subl $1,%2 ;" - " jnz 7b ;" - " jmp 13f ;" - "8: std ;" - " cmpl $12,%2 ;" - " ja 9f ;" - " movl %7,%0 ;" - " leal -1(%6,%5),%1 ;" - " jmp 11f ;" - "9: xchgl %3,%2 ;" - " movl %6,%0 ;" - " addl $1,%2 ;" - " leal -1(%7,%5),%1 ;" - " andl $3,%2 ;" - " jz 10f ;" - " subl %6,%3 ;" - " rep; smovb ;" - "10: movl %7,%2 ;" - " subl $3,%0 ;" - " shrl $2,%2 ;" - " subl $3,%1 ;" - " rep; smovl ;" - " andl $3,%3 ;" - " jz 12f ;" - " movl %7,%2 ;" - " addl $3,%0 ;" - " addl $3,%1 ;" - "11: rep; smovb ;" - "12: cld ;" - "13: nop ;" - : "=S" (from), "=D" (to), "=c" (count), "=r" (temp) - : "0" (from), "1" (to), "2" (count), "3" (temp) - : "memory", "flags", "%edx"); -#endif // AMD64 } static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { @@ -253,49 +99,16 @@ static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t co } static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { -#ifdef AMD64 _Copy_conjoint_jints_atomic(from, to, count); -#else - assert(HeapWordSize == BytesPerInt, "heapwords and jints must be the same size"); - // pd_conjoint_words is word-atomic in this implementation. - pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count); -#endif // AMD64 } static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { -#ifdef AMD64 _Copy_conjoint_jlongs_atomic(from, to, count); -#else - // Guarantee use of fild/fistp or xmm regs via some asm code, because compilers won't. - if (from > to) { - while (count-- > 0) { - __asm__ volatile("fildll (%0); fistpll (%1)" - : - : "r" (from), "r" (to) - : "memory" ); - ++from; - ++to; - } - } else { - while (count-- > 0) { - __asm__ volatile("fildll (%0,%2,8); fistpll (%1,%2,8)" - : - : "r" (from), "r" (to), "r" (count) - : "memory" ); - } - } -#endif // AMD64 } static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { -#ifdef AMD64 assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); _Copy_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); -#else - assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); - // pd_conjoint_words is word-atomic in this implementation. - pd_conjoint_words((const HeapWord*)from, (HeapWord*)to, count); -#endif // AMD64 } static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { @@ -307,28 +120,16 @@ static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size } static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { -#ifdef AMD64 - _Copy_arrayof_conjoint_jints(from, to, count); -#else - pd_conjoint_jints_atomic((const jint*)from, (jint*)to, count); -#endif // AMD64 + _Copy_arrayof_conjoint_jints(from, to, count); } static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { -#ifdef AMD64 _Copy_arrayof_conjoint_jlongs(from, to, count); -#else - pd_conjoint_jlongs_atomic((const jlong*)from, (jlong*)to, count); -#endif // AMD64 } static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { -#ifdef AMD64 assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); _Copy_arrayof_conjoint_jlongs(from, to, count); -#else - pd_conjoint_oops_atomic((const oop*)from, (oop*)to, count); -#endif // AMD64 } #endif // _WINDOWS diff --git a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp deleted file mode 100644 index 4e549552e96da..0000000000000 --- a/src/hotspot/cpu/x86/downcallLinker_x86_32.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "precompiled.hpp" -#include "prims/downcallLinker.hpp" - -RuntimeStub* DowncallLinker::make_downcall_stub(BasicType* signature, - int num_args, - BasicType ret_bt, - const ABIDescriptor& abi, - const GrowableArray& input_registers, - const GrowableArray& output_registers, - bool needs_return_buffer, - int captured_state_mask, - bool needs_transition) { - Unimplemented(); - return nullptr; -} - -void DowncallLinker::StubGenerator::pd_add_offset_to_oop(VMStorage reg_oop, VMStorage reg_offset, - VMStorage tmp1, VMStorage tmp2) const { - Unimplemented(); -} diff --git a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp b/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp deleted file mode 100644 index c62021c32637c..0000000000000 --- a/src/hotspot/cpu/x86/foreignGlobals_x86_32.cpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "precompiled.hpp" -#include "code/vmreg.hpp" -#include "prims/foreignGlobals.hpp" -#include "utilities/debug.hpp" - -class MacroAssembler; - -bool ForeignGlobals::is_foreign_linker_supported() { - return false; -} - -const ABIDescriptor ForeignGlobals::parse_abi_descriptor(jobject jabi) { - Unimplemented(); - return {}; -} - -int RegSpiller::pd_reg_size(VMStorage reg) { - Unimplemented(); - return -1; -} - -void RegSpiller::pd_store_reg(MacroAssembler* masm, int offset, VMStorage reg) { - Unimplemented(); -} - -void RegSpiller::pd_load_reg(MacroAssembler* masm, int offset, VMStorage reg) { - Unimplemented(); -} - -void ArgumentShuffle::pd_generate(MacroAssembler* masm, VMStorage tmp, int in_stk_bias, int out_stk_bias) const { - Unimplemented(); -} diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index 4e28dc125341a..28a4eb9418962 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -574,14 +574,9 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) // then ST0 is saved before EAX/EDX. See the note in generate_native_result tos_addr = (intptr_t*)sp(); if (type == T_FLOAT || type == T_DOUBLE) { - // QQQ seems like this code is equivalent on the two platforms -#ifdef AMD64 // This is times two because we do a push(ltos) after pushing XMM0 // and that takes two interpreter stack slots. tos_addr += 2 * Interpreter::stackElementWords; -#else - tos_addr += 2; -#endif // AMD64 } } else { tos_addr = (intptr_t*)interpreter_frame_tos_address(); @@ -607,19 +602,7 @@ BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) case T_SHORT : value_result->s = *(jshort*)tos_addr; break; case T_INT : value_result->i = *(jint*)tos_addr; break; case T_LONG : value_result->j = *(jlong*)tos_addr; break; - case T_FLOAT : { -#ifdef AMD64 - value_result->f = *(jfloat*)tos_addr; -#else - if (method->is_native()) { - jdouble d = *(jdouble*)tos_addr; // Result was in ST0 so need to convert to jfloat - value_result->f = (jfloat)d; - } else { - value_result->f = *(jfloat*)tos_addr; - } -#endif // AMD64 - break; - } + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; case T_VOID : /* Nothing to do */ break; default : ShouldNotReachHere(); @@ -649,7 +632,6 @@ void frame::describe_pd(FrameValues& values, int frame_no) { DESCRIBE_FP_OFFSET(interpreter_frame_locals); DESCRIBE_FP_OFFSET(interpreter_frame_bcp); DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); -#ifdef AMD64 } else if (is_entry_frame()) { // This could be more descriptive if we use the enum in // stubGenerator to map to real names but it's most important to @@ -657,7 +639,6 @@ void frame::describe_pd(FrameValues& values, int frame_no) { for (int i = 0; i < entry_frame_after_call_words; i++) { values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i)); } -#endif // AMD64 } if (is_java_frame() || Continuation::is_continuation_enterSpecial(*this)) { diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp index f3034ee9263a5..f09a61ecbf770 100644 --- a/src/hotspot/cpu/x86/frame_x86.hpp +++ b/src/hotspot/cpu/x86/frame_x86.hpp @@ -80,7 +80,6 @@ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, // Entry frames -#ifdef AMD64 #ifdef _WIN64 entry_frame_after_call_words = 28, entry_frame_call_wrapper_offset = 2, @@ -92,9 +91,6 @@ arg_reg_save_area_bytes = 0, #endif // _WIN64 -#else - entry_frame_call_wrapper_offset = 2, -#endif // AMD64 // size, in words, of frame metadata (e.g. pc and link) metadata_words = sender_sp_offset, diff --git a/src/hotspot/cpu/x86/frame_x86.inline.hpp b/src/hotspot/cpu/x86/frame_x86.inline.hpp index c74731d041095..48c2e0ff92c03 100644 --- a/src/hotspot/cpu/x86/frame_x86.inline.hpp +++ b/src/hotspot/cpu/x86/frame_x86.inline.hpp @@ -439,14 +439,10 @@ void frame::update_map_with_saved_link(RegisterMapT* map, intptr_t** link_addr) // we don't have to always save EBP/RBP on entry and exit to c2 compiled // code, on entry will be enough. map->set_location(rbp->as_VMReg(), (address) link_addr); -#ifdef AMD64 // this is weird "H" ought to be at a higher address however the // oopMaps seems to have the "H" regs at the same address and the // vanilla register. // XXXX make this go away - if (true) { - map->set_location(rbp->as_VMReg()->next(), (address) link_addr); - } -#endif // AMD64 + map->set_location(rbp->as_VMReg()->next(), (address) link_addr); } #endif // CPU_X86_FRAME_X86_INLINE_HPP diff --git a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp index 5af36d84e6ed8..4f92c44dc526f 100644 --- a/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/g1/g1BarrierSetAssembler_x86.cpp @@ -50,14 +50,8 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; if (!dest_uninitialized) { - Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); -#ifndef _LP64 - __ push(thread); - __ get_thread(thread); -#endif - Label filtered; - Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); + Address in_progress(r15_thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); // Is marking active? if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { __ cmpl(in_progress, 0); @@ -66,12 +60,9 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm __ cmpb(in_progress, 0); } - NOT_LP64(__ pop(thread);) - __ jcc(Assembler::equal, filtered); __ push_call_clobbered_registers(false /* save_fpu */); -#ifdef _LP64 if (count == c_rarg0) { if (addr == c_rarg1) { // exactly backwards!! @@ -89,10 +80,6 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm } else { __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); } -#else - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), - addr, count); -#endif __ pop_call_clobbered_registers(false /* save_fpu */); __ bind(filtered); @@ -102,7 +89,6 @@ void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp) { __ push_call_clobbered_registers(false /* save_fpu */); -#ifdef _LP64 if (c_rarg0 == count) { // On win64 c_rarg0 == rcx assert_different_registers(c_rarg1, addr); __ mov(c_rarg1, count); @@ -113,10 +99,6 @@ void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* mas __ mov(c_rarg1, count); } __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); -#else - __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), - addr, count); -#endif __ pop_call_clobbered_registers(false /* save_fpu */); } @@ -128,38 +110,16 @@ void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorator bool on_reference = on_weak || on_phantom; ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); if (on_oop && on_reference) { - Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); - -#ifndef _LP64 - // Work around the x86_32 bug that only manifests with Loom for some reason. - // MacroAssembler::resolve_weak_handle calls this barrier with tmp_thread == noreg. - if (thread == noreg) { - if (dst != rcx && tmp1 != rcx) { - thread = rcx; - } else if (dst != rdx && tmp1 != rdx) { - thread = rdx; - } else if (dst != rdi && tmp1 != rdi) { - thread = rdi; - } - } - assert_different_registers(dst, tmp1, thread); - __ push(thread); - __ get_thread(thread); -#endif - // Generate the G1 pre-barrier code to log the value of // the referent field in an SATB buffer. g1_write_barrier_pre(masm /* masm */, noreg /* obj */, dst /* pre_val */, - thread /* thread */, + r15_thread /* thread */, tmp1 /* tmp */, true /* tosca_live */, true /* expand_call */); -#ifndef _LP64 - __ pop(thread); -#endif } } @@ -224,9 +184,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, // directly to skip generating the check by // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -#ifdef _LP64 assert(thread == r15_thread, "must be"); -#endif // _LP64 Label done; Label runtime; @@ -261,18 +219,13 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, // expand_call should be passed true. if (expand_call) { - LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) -#ifdef _LP64 + assert(pre_val != c_rarg1, "smashed arg"); if (c_rarg1 != thread) { __ mov(c_rarg1, thread); } if (c_rarg0 != pre_val) { __ mov(c_rarg0, pre_val); } -#else - __ push(thread); - __ push(pre_val); -#endif __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), 2); } else { __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); @@ -337,9 +290,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, Register thread, Register tmp, Register tmp2) { -#ifdef _LP64 assert(thread == r15_thread, "must be"); -#endif // _LP64 Label done; Label runtime; @@ -351,7 +302,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, __ bind(runtime); // save the live input values - RegSet saved = RegSet::of(store_addr NOT_LP64(COMMA thread)); + RegSet saved = RegSet::of(store_addr); __ push_set(saved); __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), tmp, thread); __ pop_set(saved); @@ -362,7 +313,6 @@ void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, #if defined(COMPILER2) static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStubC2* stub, const Register arg, const address runtime_path) { -#ifdef _LP64 SaveLiveRegisters save_registers(masm, stub); if (c_rarg0 != arg) { __ mov(c_rarg0, arg); @@ -374,9 +324,6 @@ static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStub // call. If it did not contain any live value, it is free to be used. In // either case, it is safe to use it here as a call scratch register. __ call(RuntimeAddress(runtime_path), rax); -#else - Unimplemented(); -#endif // _LP64 } void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, @@ -385,9 +332,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm, Register thread, Register tmp, G1PreBarrierStubC2* stub) { -#ifdef _LP64 assert(thread == r15_thread, "must be"); -#endif // _LP64 assert(pre_val != noreg, "check this code"); if (obj != noreg) { assert_different_registers(obj, pre_val, tmp); @@ -427,9 +372,7 @@ void G1BarrierSetAssembler::g1_write_barrier_post_c2(MacroAssembler* masm, Register tmp, Register tmp2, G1PostBarrierStubC2* stub) { -#ifdef _LP64 assert(thread == r15_thread, "must be"); -#endif // _LP64 stub->initialize_registers(thread, tmp, tmp2); @@ -468,7 +411,6 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco bool needs_pre_barrier = as_normal; bool needs_post_barrier = val != noreg && in_heap; - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); // flatten object address if needed // We do it regardless of precise because we need the registers if (dst.index() == noreg && dst.disp() == 0) { @@ -479,18 +421,11 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco __ lea(tmp1, dst); } -#ifndef _LP64 - InterpreterMacroAssembler *imasm = static_cast(masm); -#endif - - NOT_LP64(__ get_thread(rcx)); - NOT_LP64(imasm->save_bcp()); - if (needs_pre_barrier) { g1_write_barrier_pre(masm /*masm*/, tmp1 /* obj */, tmp2 /* pre_val */, - rthread /* thread */, + r15_thread /* thread */, tmp3 /* tmp */, val != noreg /* tosca_live */, false /* expand_call */); @@ -511,12 +446,11 @@ void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet deco g1_write_barrier_post(masm /*masm*/, tmp1 /* store_adr */, new_val /* new_val */, - rthread /* thread */, + r15_thread /* thread */, tmp3 /* tmp */, tmp2 /* tmp2 */); } } - NOT_LP64(imasm->restore_bcp()); } #ifdef COMPILER1 @@ -576,11 +510,9 @@ void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* __ push(rdx); const Register pre_val = rax; - const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register thread = r15_thread; const Register tmp = rdx; - NOT_LP64(__ get_thread(thread);) - Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); @@ -642,7 +574,7 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* // At this point we know new_value is non-null and the new_value crosses regions. // Must check to see if card is already dirty - const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register thread = r15_thread; Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); @@ -660,8 +592,6 @@ void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* __ movptr(cardtable, (intptr_t)ct->card_table()->byte_map_base()); __ addptr(card_addr, cardtable); - NOT_LP64(__ get_thread(thread);) - __ cmpb(Address(card_addr, 0), G1CardTable::g1_young_card_val()); __ jcc(Assembler::equal, done); diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp index cd0e43b68bf9e..e10eaa620d73b 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.cpp @@ -51,7 +51,6 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, case T_OBJECT: case T_ARRAY: { if (in_heap) { -#ifdef _LP64 if (UseCompressedOops) { __ movl(dst, src); if (is_not_null) { @@ -59,9 +58,7 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, } else { __ decode_heap_oop(dst); } - } else -#endif - { + } else { __ movptr(dst, src); } } else { @@ -86,20 +83,7 @@ void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, break; case T_LONG: assert(dst == noreg, "only to ltos"); -#ifdef _LP64 __ movq(rax, src); -#else - if (atomic) { - __ fild_d(src); // Must load atomically - __ subptr(rsp,2*wordSize); // Make space for store - __ fistp_d(Address(rsp,0)); - __ pop(rax); - __ pop(rdx); - } else { - __ movl(rax, src); - __ movl(rdx, src.plus_disp(wordSize)); - } -#endif break; default: Unimplemented(); } @@ -118,17 +102,12 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators if (in_heap) { if (val == noreg) { assert(!is_not_null, "inconsistent access"); -#ifdef _LP64 if (UseCompressedOops) { __ movl(dst, NULL_WORD); } else { __ movslq(dst, NULL_WORD); } -#else - __ movl(dst, NULL_WORD); -#endif } else { -#ifdef _LP64 if (UseCompressedOops) { assert(!dst.uses(val), "not enough registers"); if (is_not_null) { @@ -137,9 +116,7 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators __ encode_heap_oop(val); } __ movl(dst, val); - } else -#endif - { + } else { __ movptr(dst, val); } } @@ -168,20 +145,7 @@ void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators break; case T_LONG: assert(val == noreg, "only tos"); -#ifdef _LP64 __ movq(dst, rax); -#else - if (atomic) { - __ push(rdx); - __ push(rax); // Must update atomically with FIST - __ fild_d(Address(rsp,0)); // So load into FPU register - __ fistp_d(dst); // and put into memory atomically - __ addptr(rsp, 2*wordSize); - } else { - __ movptr(dst, rax); - __ movptr(dst.plus_disp(wordSize), rdx); - } -#endif break; case T_FLOAT: assert(val == noreg, "only tos"); @@ -217,20 +181,14 @@ void BarrierSetAssembler::copy_load_at(MacroAssembler* masm, __ movl(dst, src); break; case 8: -#ifdef _LP64 __ movq(dst, src); -#else - fatal("No support for 8 bytes copy"); -#endif break; default: fatal("Unexpected size"); } -#ifdef _LP64 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { __ decode_heap_oop(dst); } -#endif } void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, @@ -240,11 +198,9 @@ void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, Address dst, Register src, Register tmp) { -#ifdef _LP64 if ((decorators & ARRAYCOPY_CHECKCAST) != 0 && UseCompressedOops) { __ encode_heap_oop(src); } -#endif assert(bytes <= 8, "can only deal with non-vector registers"); switch (bytes) { case 1: @@ -257,11 +213,7 @@ void BarrierSetAssembler::copy_store_at(MacroAssembler* masm, __ movl(dst, src); break; case 8: -#ifdef _LP64 __ movq(dst, src); -#else - fatal("No support for 8 bytes copy"); -#endif break; default: fatal("Unexpected size"); @@ -322,13 +274,7 @@ void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, assert_different_registers(obj, var_size_in_bytes, t1); Register end = t2; if (!thread->is_valid()) { -#ifdef _LP64 thread = r15_thread; -#else - assert(t1->is_valid(), "need temp reg"); - thread = t1; - __ get_thread(thread); -#endif } __ verify_tlab(); @@ -352,7 +298,6 @@ void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, __ verify_tlab(); } -#ifdef _LP64 void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation) { BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); if (bs_nm == nullptr) { @@ -379,27 +324,6 @@ void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slo __ bind(done); } } -#else -void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label*, Label*) { - BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); - if (bs_nm == nullptr) { - return; - } - - Label continuation; - - Register tmp = rdi; - __ push(tmp); - __ movptr(tmp, (intptr_t)bs_nm->disarmed_guard_value_address()); - Address disarmed_addr(tmp, 0); - __ align(4); - __ cmpl_imm32(disarmed_addr, 0); - __ pop(tmp); - __ jcc(Assembler::equal, continuation); - __ call(RuntimeAddress(StubRoutines::method_entry_barrier())); - __ bind(continuation); -} -#endif void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { BarrierSetNMethod* bs = BarrierSet::barrier_set()->barrier_set_nmethod(); @@ -411,12 +335,8 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { __ cmpptr(rbx, 0); // rbx contains the incoming method for c2i adapters. __ jcc(Assembler::equal, bad_call); - Register tmp1 = LP64_ONLY( rscratch1 ) NOT_LP64( rax ); - Register tmp2 = LP64_ONLY( rscratch2 ) NOT_LP64( rcx ); -#ifndef _LP64 - __ push(tmp1); - __ push(tmp2); -#endif // !_LP64 + Register tmp1 = rscratch1; + Register tmp2 = rscratch2; // Pointer chase to the method holder to find out if the method is concurrently unloading. Label method_live; @@ -432,19 +352,9 @@ void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) { __ cmpptr(tmp1, 0); __ jcc(Assembler::notEqual, method_live); -#ifndef _LP64 - __ pop(tmp2); - __ pop(tmp1); -#endif - __ bind(bad_call); __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); __ bind(method_live); - -#ifndef _LP64 - __ pop(tmp2); - __ pop(tmp1); -#endif } void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register tmp1, Register tmp2, Label& error) { @@ -464,8 +374,6 @@ void BarrierSetAssembler::check_oop(MacroAssembler* masm, Register obj, Register #ifdef COMPILER2 -#ifdef _LP64 - OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { if (!OptoReg::is_reg(opto_reg)) { return OptoReg::Bad; @@ -741,12 +649,4 @@ SaveLiveRegisters::~SaveLiveRegisters() { } } -#else // !_LP64 - -OptoReg::Name BarrierSetAssembler::refine_register(const Node* node, OptoReg::Name opto_reg) { - Unimplemented(); // This must be implemented to support late barrier expansion. -} - -#endif // _LP64 - #endif // COMPILER2 diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp index 5dde1c7aeedbb..e23dc4334dceb 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetAssembler_x86.hpp @@ -114,8 +114,6 @@ class BarrierSetAssembler: public CHeapObj { #ifdef COMPILER2 -#ifdef _LP64 - // This class saves and restores the registers that need to be preserved across // the runtime call represented by a given C2 barrier stub. Use as follows: // { @@ -160,8 +158,6 @@ class SaveLiveRegisters { ~SaveLiveRegisters(); }; -#endif // _LP64 - #endif // COMPILER2 #endif // CPU_X86_GC_SHARED_BARRIERSETASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp index dfd9d59016f0a..69f6af91a8dbe 100644 --- a/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/barrierSetNMethod_x86.cpp @@ -40,7 +40,6 @@ class NativeNMethodCmpBarrier: public NativeInstruction { public: -#ifdef _LP64 enum Intel_specific_constants { instruction_code = 0x81, instruction_size = 8, @@ -48,14 +47,6 @@ class NativeNMethodCmpBarrier: public NativeInstruction { instruction_rex_prefix = Assembler::REX | Assembler::REX_B, instruction_modrm = 0x7f // [r15 + offset] }; -#else - enum Intel_specific_constants { - instruction_code = 0x81, - instruction_size = 7, - imm_offset = 2, - instruction_modrm = 0x3f // [rdi] - }; -#endif address instruction_address() const { return addr_at(0); } address immediate_address() const { return addr_at(imm_offset); } @@ -71,7 +62,6 @@ class NativeNMethodCmpBarrier: public NativeInstruction { } }; -#ifdef _LP64 bool NativeNMethodCmpBarrier::check_barrier(err_msg& msg) const { // Only require 4 byte alignment if (((uintptr_t) instruction_address()) & 0x3) { @@ -98,29 +88,6 @@ bool NativeNMethodCmpBarrier::check_barrier(err_msg& msg) const { } return true; } -#else -bool NativeNMethodCmpBarrier::check_barrier(err_msg& msg) const { - if (((uintptr_t) instruction_address()) & 0x3) { - msg.print("Addr: " INTPTR_FORMAT " not properly aligned", p2i(instruction_address())); - return false; - } - - int inst = ubyte_at(0); - if (inst != instruction_code) { - msg.print("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()), - inst); - return false; - } - - int modrm = ubyte_at(1); - if (modrm != instruction_modrm) { - msg.print("Addr: " INTPTR_FORMAT " mod/rm: 0x%x", p2i(instruction_address()), - modrm); - return false; - } - return true; -} -#endif // _LP64 void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { /* @@ -170,15 +137,11 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) { // not find the expected native instruction at this offset, which needs updating. // Note that this offset is invariant of PreserveFramePointer. static int entry_barrier_offset(nmethod* nm) { -#ifdef _LP64 if (nm->is_compiled_by_c2()) { return -14; } else { return -15; } -#else - return -18; -#endif } static NativeNMethodCmpBarrier* native_nmethod_barrier(nmethod* nm) { diff --git a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp index b04ab35862a3e..4ad342fdcfbda 100644 --- a/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/cardTableBarrierSetAssembler_x86.cpp @@ -57,7 +57,6 @@ void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembl __ jcc(Assembler::zero, L_done); // zero count - nothing to do -#ifdef _LP64 __ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive __ shrptr(addr, CardTable::card_shift()); @@ -70,17 +69,6 @@ __ BIND(L_loop); __ movb(Address(addr, count, Address::times_1), 0); __ decrement(count); __ jcc(Assembler::greaterEqual, L_loop); -#else - __ lea(end, Address(addr, count, Address::times_ptr, -wordSize)); - __ shrptr(addr, CardTable::card_shift()); - __ shrptr(end, CardTable::card_shift()); - __ subptr(end, addr); // end --> count -__ BIND(L_loop); - Address cardtable(addr, count, Address::times_1, disp); - __ movb(cardtable, 0); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_loop); -#endif __ BIND(L_done); } diff --git a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp index 618095bdfa634..5d4d5dc1fc34e 100644 --- a/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shared/modRefBarrierSetAssembler_x86.cpp @@ -32,10 +32,9 @@ void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Decorat Register src, Register dst, Register count) { bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; - bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); + bool obj_int = (type == T_OBJECT) && UseCompressedOops; if (is_reference_type(type)) { -#ifdef _LP64 if (!checkcast) { if (!obj_int) { // Save count for barrier @@ -45,11 +44,6 @@ void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Decorat __ movq(r11, dst); } } -#else - if (disjoint) { - __ mov(rdx, dst); // save 'to' - } -#endif gen_write_ref_array_pre_barrier(masm, decorators, dst, count); } } @@ -58,11 +52,10 @@ void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Decorat Register src, Register dst, Register count) { bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; - bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); + bool obj_int = (type == T_OBJECT) && UseCompressedOops; Register tmp = rax; if (is_reference_type(type)) { -#ifdef _LP64 if (!checkcast) { if (!obj_int) { // Save count for barrier @@ -74,11 +67,6 @@ void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Decorat } else { tmp = rscratch1; } -#else - if (disjoint) { - __ mov(dst, rdx); // restore 'to' - } -#endif gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp); } } diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp index eb6da25d1bc7a..788f0ecaa53d9 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp @@ -34,7 +34,6 @@ #define __ masm->masm()-> void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { - NOT_LP64(assert(_addr->is_single_cpu(), "must be single");) Register addr = _addr->is_single_cpu() ? _addr->as_register() : _addr->as_register_lo(); Register newval = _new_value->as_register(); Register cmpval = _cmp_value->as_register(); @@ -47,14 +46,12 @@ void LIR_OpShenandoahCompareAndSwap::emit_code(LIR_Assembler* masm) { assert(cmpval != addr, "cmp and addr must be in different registers"); assert(newval != addr, "new value and addr must be in different registers"); -#ifdef _LP64 if (UseCompressedOops) { __ encode_heap_oop(cmpval); __ mov(rscratch1, newval); __ encode_heap_oop(rscratch1); newval = rscratch1; } -#endif ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm->masm(), result, Address(addr, 0), cmpval, newval, false, tmp1, tmp2); } @@ -106,7 +103,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt // Because we want a 2-arg form of xchg and xadd __ move(value_opr, result); - assert(type == T_INT || is_reference_type(type) LP64_ONLY( || type == T_LONG ), "unexpected type"); + assert(type == T_INT || is_reference_type(type) || type == T_LONG, "unexpected type"); __ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr); if (access.is_oop()) { diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index a452850b1e814..36df91453b550 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -52,63 +52,33 @@ static void save_machine_state(MacroAssembler* masm, bool handle_gpr, bool handl if (handle_fp) { // Some paths can be reached from the c2i adapter with live fp arguments in registers. - LP64_ONLY(assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call")); - - if (UseSSE >= 2) { - const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4); - __ subptr(rsp, xmm_size * 8); - __ movdbl(Address(rsp, xmm_size * 0), xmm0); - __ movdbl(Address(rsp, xmm_size * 1), xmm1); - __ movdbl(Address(rsp, xmm_size * 2), xmm2); - __ movdbl(Address(rsp, xmm_size * 3), xmm3); - __ movdbl(Address(rsp, xmm_size * 4), xmm4); - __ movdbl(Address(rsp, xmm_size * 5), xmm5); - __ movdbl(Address(rsp, xmm_size * 6), xmm6); - __ movdbl(Address(rsp, xmm_size * 7), xmm7); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2); - __ subptr(rsp, xmm_size * 8); - __ movflt(Address(rsp, xmm_size * 0), xmm0); - __ movflt(Address(rsp, xmm_size * 1), xmm1); - __ movflt(Address(rsp, xmm_size * 2), xmm2); - __ movflt(Address(rsp, xmm_size * 3), xmm3); - __ movflt(Address(rsp, xmm_size * 4), xmm4); - __ movflt(Address(rsp, xmm_size * 5), xmm5); - __ movflt(Address(rsp, xmm_size * 6), xmm6); - __ movflt(Address(rsp, xmm_size * 7), xmm7); - } else { - __ push_FPU_state(); - } + assert(Argument::n_float_register_parameters_j == 8, "8 fp registers to save at java call"); + + const int xmm_size = wordSize * 2; + __ subptr(rsp, xmm_size * 8); + __ movdbl(Address(rsp, xmm_size * 0), xmm0); + __ movdbl(Address(rsp, xmm_size * 1), xmm1); + __ movdbl(Address(rsp, xmm_size * 2), xmm2); + __ movdbl(Address(rsp, xmm_size * 3), xmm3); + __ movdbl(Address(rsp, xmm_size * 4), xmm4); + __ movdbl(Address(rsp, xmm_size * 5), xmm5); + __ movdbl(Address(rsp, xmm_size * 6), xmm6); + __ movdbl(Address(rsp, xmm_size * 7), xmm7); } } static void restore_machine_state(MacroAssembler* masm, bool handle_gpr, bool handle_fp) { if (handle_fp) { - if (UseSSE >= 2) { - const int xmm_size = wordSize * LP64_ONLY(2) NOT_LP64(4); - __ movdbl(xmm0, Address(rsp, xmm_size * 0)); - __ movdbl(xmm1, Address(rsp, xmm_size * 1)); - __ movdbl(xmm2, Address(rsp, xmm_size * 2)); - __ movdbl(xmm3, Address(rsp, xmm_size * 3)); - __ movdbl(xmm4, Address(rsp, xmm_size * 4)); - __ movdbl(xmm5, Address(rsp, xmm_size * 5)); - __ movdbl(xmm6, Address(rsp, xmm_size * 6)); - __ movdbl(xmm7, Address(rsp, xmm_size * 7)); - __ addptr(rsp, xmm_size * 8); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * LP64_ONLY(1) NOT_LP64(2); - __ movflt(xmm0, Address(rsp, xmm_size * 0)); - __ movflt(xmm1, Address(rsp, xmm_size * 1)); - __ movflt(xmm2, Address(rsp, xmm_size * 2)); - __ movflt(xmm3, Address(rsp, xmm_size * 3)); - __ movflt(xmm4, Address(rsp, xmm_size * 4)); - __ movflt(xmm5, Address(rsp, xmm_size * 5)); - __ movflt(xmm6, Address(rsp, xmm_size * 6)); - __ movflt(xmm7, Address(rsp, xmm_size * 7)); - __ addptr(rsp, xmm_size * 8); - } else { - __ pop_FPU_state(); - } + const int xmm_size = wordSize * 2; + __ movdbl(xmm0, Address(rsp, xmm_size * 0)); + __ movdbl(xmm1, Address(rsp, xmm_size * 1)); + __ movdbl(xmm2, Address(rsp, xmm_size * 2)); + __ movdbl(xmm3, Address(rsp, xmm_size * 3)); + __ movdbl(xmm4, Address(rsp, xmm_size * 4)); + __ movdbl(xmm5, Address(rsp, xmm_size * 5)); + __ movdbl(xmm6, Address(rsp, xmm_size * 6)); + __ movdbl(xmm7, Address(rsp, xmm_size * 7)); + __ addptr(rsp, xmm_size * 8); } if (handle_gpr) { @@ -125,11 +95,10 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec if (ShenandoahCardBarrier) { bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; - bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); + bool obj_int = type == T_OBJECT && UseCompressedOops; // We need to save the original element count because the array copy stub // will destroy the value and we need it for the card marking barrier. -#ifdef _LP64 if (!checkcast) { if (!obj_int) { // Save count for barrier @@ -139,31 +108,10 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec __ movq(r11, dst); } } -#else - if (disjoint) { - __ mov(rdx, dst); // save 'to' - } -#endif } if ((ShenandoahSATBBarrier && !dest_uninitialized) || ShenandoahLoadRefBarrier) { -#ifdef _LP64 - Register thread = r15_thread; -#else - Register thread = rax; - if (thread == src || thread == dst || thread == count) { - thread = rbx; - } - if (thread == src || thread == dst || thread == count) { - thread = rcx; - } - if (thread == src || thread == dst || thread == count) { - thread = rdx; - } - __ push(thread); - __ get_thread(thread); -#endif - assert_different_registers(src, dst, count, thread); + assert_different_registers(src, dst, count, r15_thread); Label L_done; // Short-circuit if count == 0. @@ -171,7 +119,7 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec __ jcc(Assembler::zero, L_done); // Avoid runtime call when not active. - Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); int flags; if (ShenandoahSATBBarrier && dest_uninitialized) { flags = ShenandoahHeap::HAS_FORWARDED; @@ -183,16 +131,13 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false); -#ifdef _LP64 assert(src == rdi, "expected"); assert(dst == rsi, "expected"); assert(count == rdx, "expected"); if (UseCompressedOops) { __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_narrow_oop), src, dst, count); - } else -#endif - { + } else { __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::arraycopy_barrier_oop), src, dst, count); } @@ -200,7 +145,6 @@ void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, Dec restore_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ false); __ bind(L_done); - NOT_LP64(__ pop(thread);) } } @@ -212,10 +156,9 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec if (ShenandoahCardBarrier && is_reference_type(type)) { bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0; bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0; - bool obj_int = type == T_OBJECT LP64_ONLY(&& UseCompressedOops); + bool obj_int = type == T_OBJECT && UseCompressedOops; Register tmp = rax; -#ifdef _LP64 if (!checkcast) { if (!obj_int) { // Save count for barrier @@ -227,11 +170,6 @@ void ShenandoahBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, Dec } else { tmp = rscratch1; } -#else - if (disjoint) { - __ mov(dst, rdx); // restore 'to' - } -#endif gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp); } } @@ -260,9 +198,7 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, // directly to skip generating the check by // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. -#ifdef _LP64 assert(thread == r15_thread, "must be"); -#endif // _LP64 Label done; Label runtime; @@ -328,9 +264,6 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, // So when we do not have have a full interpreter frame on the stack // expand_call should be passed true. - NOT_LP64( __ push(thread); ) - -#ifdef _LP64 // We move pre_val into c_rarg0 early, in order to avoid smashing it, should // pre_val be c_rarg1 (where the call prologue would copy thread argument). // Note: this should not accidentally smash thread, because thread is always r15. @@ -338,26 +271,18 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, if (c_rarg0 != pre_val) { __ mov(c_rarg0, pre_val); } -#endif if (expand_call) { - LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); ) -#ifdef _LP64 + assert(pre_val != c_rarg1, "smashed arg"); if (c_rarg1 != thread) { __ mov(c_rarg1, thread); } // Already moved pre_val into c_rarg0 above -#else - __ push(thread); - __ push(pre_val); -#endif __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), 2); } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), LP64_ONLY(c_rarg0) NOT_LP64(pre_val), thread); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::write_ref_field_pre), c_rarg0, thread); } - NOT_LP64( __ pop(thread); ) - // save the live input values if (pre_val != rax) __ pop(pre_val); @@ -384,16 +309,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, __ block_comment("load_reference_barrier { "); // Check if GC is active -#ifdef _LP64 Register thread = r15_thread; -#else - Register thread = rcx; - if (thread == dst) { - thread = rbx; - } - __ push(thread); - __ get_thread(thread); -#endif Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); int flags = ShenandoahHeap::HAS_FORWARDED; @@ -439,7 +355,7 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, // The rest is saved with the optimized path - uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4); + uint num_saved_regs = 4 + (dst != rax ? 1 : 0) + 4; __ subptr(rsp, num_saved_regs * wordSize); uint slot = num_saved_regs; if (dst != rax) { @@ -449,21 +365,15 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, __ movptr(Address(rsp, (--slot) * wordSize), rdx); __ movptr(Address(rsp, (--slot) * wordSize), rdi); __ movptr(Address(rsp, (--slot) * wordSize), rsi); -#ifdef _LP64 __ movptr(Address(rsp, (--slot) * wordSize), r8); __ movptr(Address(rsp, (--slot) * wordSize), r9); __ movptr(Address(rsp, (--slot) * wordSize), r10); __ movptr(Address(rsp, (--slot) * wordSize), r11); // r12-r15 are callee saved in all calling conventions -#endif assert(slot == 0, "must use all slots"); // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1. -#ifdef _LP64 Register arg0 = c_rarg0, arg1 = c_rarg1; -#else - Register arg0 = rdi, arg1 = rsi; -#endif if (dst == arg1) { __ lea(arg0, src); __ xchgptr(arg1, arg0); @@ -490,12 +400,10 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), arg0, arg1); } -#ifdef _LP64 __ movptr(r11, Address(rsp, (slot++) * wordSize)); __ movptr(r10, Address(rsp, (slot++) * wordSize)); __ movptr(r9, Address(rsp, (slot++) * wordSize)); __ movptr(r8, Address(rsp, (slot++) * wordSize)); -#endif __ movptr(rsi, Address(rsp, (slot++) * wordSize)); __ movptr(rdi, Address(rsp, (slot++) * wordSize)); __ movptr(rdx, Address(rsp, (slot++) * wordSize)); @@ -521,10 +429,6 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, __ bind(heap_stable); __ block_comment("} load_reference_barrier"); - -#ifndef _LP64 - __ pop(thread); -#endif } // @@ -590,18 +494,13 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d if (ShenandoahBarrierSet::need_keep_alive_barrier(decorators, type)) { save_machine_state(masm, /* handle_gpr = */ true, /* handle_fp = */ true); - Register thread = NOT_LP64(tmp_thread) LP64_ONLY(r15_thread); - assert_different_registers(dst, tmp1, tmp_thread); - if (!thread->is_valid()) { - thread = rdx; - } - NOT_LP64(__ get_thread(thread)); + assert_different_registers(dst, tmp1, r15_thread); // Generate the SATB pre-barrier code to log the value of // the referent field in an SATB buffer. shenandoah_write_barrier_pre(masm /* masm */, noreg /* obj */, dst /* pre_val */, - thread /* thread */, + r15_thread /* thread */, tmp1 /* tmp */, true /* tosca_live */, true /* expand_call */); @@ -662,7 +561,6 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet if (on_oop && in_heap) { bool needs_pre_barrier = as_normal; - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); // flatten object address if needed // We do it regardless of precise because we need the registers if (dst.index() == noreg && dst.disp() == 0) { @@ -673,19 +571,13 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet __ lea(tmp1, dst); } - assert_different_registers(val, tmp1, tmp2, tmp3, rthread); - -#ifndef _LP64 - __ get_thread(rthread); - InterpreterMacroAssembler *imasm = static_cast(masm); - imasm->save_bcp(); -#endif + assert_different_registers(val, tmp1, tmp2, tmp3, r15_thread); if (needs_pre_barrier) { shenandoah_write_barrier_pre(masm /*masm*/, tmp1 /* obj */, tmp2 /* pre_val */, - rthread /* thread */, + r15_thread /* thread */, tmp3 /* tmp */, val != noreg /* tosca_live */, false /* expand_call */); @@ -697,7 +589,6 @@ void ShenandoahBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet store_check(masm, tmp1); } } - NOT_LP64(imasm->restore_bcp()); } else { BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2, tmp3); } @@ -732,12 +623,9 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, Label L_success, L_failure; // Remember oldval for retry logic below -#ifdef _LP64 if (UseCompressedOops) { __ movl(tmp1, oldval); - } else -#endif - { + } else { __ movptr(tmp1, oldval); } @@ -745,13 +633,10 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, // // Try to CAS with given arguments. If successful, then we are done. -#ifdef _LP64 if (UseCompressedOops) { __ lock(); __ cmpxchgl(newval, addr); - } else -#endif - { + } else { __ lock(); __ cmpxchgptr(newval, addr); } @@ -772,23 +657,14 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, __ jcc(Assembler::zero, L_failure); // Filter: when heap is stable, the failure is definitely legitimate -#ifdef _LP64 - const Register thread = r15_thread; -#else - const Register thread = tmp2; - __ get_thread(thread); -#endif - Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); + Address gc_state(r15_thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); __ jcc(Assembler::zero, L_failure); -#ifdef _LP64 if (UseCompressedOops) { __ movl(tmp2, oldval); __ decode_heap_oop(tmp2); - } else -#endif - { + } else { __ movptr(tmp2, oldval); } @@ -803,11 +679,9 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, __ shrptr(tmp2, 2); __ shlptr(tmp2, 2); -#ifdef _LP64 if (UseCompressedOops) { __ decode_heap_oop(tmp1); // decode for comparison } -#endif // Now we have the forwarded offender in tmp2. // Compare and if they don't match, we have legitimate failure @@ -823,19 +697,11 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, // with to-space ptr store. We still have to do the retry, because the GC might // have updated the reference for us. -#ifdef _LP64 if (UseCompressedOops) { __ encode_heap_oop(tmp2); // previously decoded at step 2. - } -#endif - -#ifdef _LP64 - if (UseCompressedOops) { __ lock(); __ cmpxchgl(tmp2, addr); - } else -#endif - { + } else { __ lock(); __ cmpxchgptr(tmp2, addr); } @@ -847,22 +713,12 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, // from-space ptr into memory anymore. Make sure oldval is restored, after being // garbled during retries. // -#ifdef _LP64 if (UseCompressedOops) { __ movl(oldval, tmp2); - } else -#endif - { - __ movptr(oldval, tmp2); - } - -#ifdef _LP64 - if (UseCompressedOops) { __ lock(); __ cmpxchgl(newval, addr); - } else -#endif - { + } else { + __ movptr(oldval, tmp2); __ lock(); __ cmpxchgptr(newval, addr); } @@ -918,7 +774,6 @@ void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssemb __ testl(count, count); __ jccb(Assembler::zero, L_done); -#ifdef _LP64 __ leaq(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive __ shrptr(addr, CardTable::card_shift()); @@ -932,18 +787,6 @@ void ShenandoahBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssemb __ movb(Address(addr, count, Address::times_1), 0); __ decrement(count); __ jccb(Assembler::greaterEqual, L_loop); -#else - __ lea(end, Address(addr, count, Address::times_ptr, -wordSize)); - __ shrptr(addr, CardTable::card_shift()); - __ shrptr(end, CardTable::card_shift()); - __ subptr(end, addr); // end --> count - - __ BIND(L_loop); - Address cardtable(addr, count, Address::times_1, disp); - __ movb(cardtable, 0); - __ decrement(count); - __ jccb(Assembler::greaterEqual, L_loop); -#endif __ BIND(L_done); } @@ -1008,15 +851,8 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble __ mov(tmp1, res); __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); -#ifdef _LP64 __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); __ testbool(tmp2); -#else - // On x86_32, C1 register allocator can give us the register without 8-bit support. - // Do the full-register access and test to avoid compilation failures. - __ movptr(tmp2, Address(tmp2, tmp1, Address::times_1)); - __ testptr(tmp2, 0xFF); -#endif __ jcc(Assembler::zero, *stub->continuation()); } @@ -1050,11 +886,9 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ push(rdx); const Register pre_val = rax; - const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); + const Register thread = r15_thread; const Register tmp = rdx; - NOT_LP64(__ get_thread(thread);) - Address queue_index(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_index_offset())); Address buffer(thread, in_bytes(ShenandoahThreadLocalData::satb_mark_queue_buffer_offset())); @@ -1109,7 +943,6 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s bool is_phantom = ShenandoahBarrierSet::is_phantom_access(decorators); bool is_native = ShenandoahBarrierSet::is_native_access(decorators); -#ifdef _LP64 __ load_parameter(0, c_rarg0); __ load_parameter(1, c_rarg1); if (is_strong) { @@ -1134,18 +967,6 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s assert(is_native, "phantom must only be called off-heap"); __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), c_rarg0, c_rarg1); } -#else - __ load_parameter(0, rax); - __ load_parameter(1, rbx); - if (is_strong) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_strong), rax, rbx); - } else if (is_weak) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx); - } else { - assert(is_phantom, "only remaining strength"); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_phantom), rax, rbx); - } -#endif __ restore_live_registers_except_rax(true); diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad b/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad deleted file mode 100644 index 3cf82bf9fb197..0000000000000 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoah_x86_32.ad +++ /dev/null @@ -1,71 +0,0 @@ -// -// Copyright (c) 2018, Red Hat, Inc. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -source_hpp %{ -#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp" -#include "gc/shenandoah/c2/shenandoahSupport.hpp" -%} - -instruct compareAndSwapP_shenandoah(rRegI res, - memory mem_ptr, - eRegP tmp1, eRegP tmp2, - eAXRegP oldval, eRegP newval, - eFlagsReg cr) -%{ - match(Set res (ShenandoahCompareAndSwapP mem_ptr (Binary oldval newval))); - match(Set res (ShenandoahWeakCompareAndSwapP mem_ptr (Binary oldval newval))); - effect(TEMP tmp1, TEMP tmp2, KILL cr, KILL oldval); - - format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} - - ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - $res$$Register, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - false, // swap - $tmp1$$Register, $tmp2$$Register - ); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeP_shenandoah(memory mem_ptr, - eAXRegP oldval, eRegP newval, - eRegP tmp1, eRegP tmp2, - eFlagsReg cr) -%{ - match(Set oldval (ShenandoahCompareAndExchangeP mem_ptr (Binary oldval newval))); - effect(KILL cr, TEMP tmp1, TEMP tmp2); - ins_cost(1000); - - format %{ "shenandoah_cas_oop $mem_ptr,$newval" %} - - ins_encode %{ - ShenandoahBarrierSet::assembler()->cmpxchg_oop(masm, - noreg, $mem_ptr$$Address, $oldval$$Register, $newval$$Register, - true, // exchange - $tmp1$$Register, $tmp2$$Register - ); - %} - ins_pipe( pipe_cmpxchg ); -%} diff --git a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp index 873cfbdcea0ec..3c1474ae8611a 100644 --- a/src/hotspot/cpu/x86/globalDefinitions_x86.hpp +++ b/src/hotspot/cpu/x86/globalDefinitions_x86.hpp @@ -34,9 +34,7 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define SUPPORTS_NATIVE_CX8 -#ifdef _LP64 #define SUPPORT_MONITOR_COUNT -#endif #define CPU_MULTI_COPY_ATOMIC @@ -44,15 +42,11 @@ const bool CCallingConventionRequiresIntsAsLongs = false; #define DEFAULT_CACHE_LINE_SIZE 64 // The default padding size for data structures to avoid false sharing. -#ifdef _LP64 // The common wisdom is that adjacent cache line prefetchers on some hardware // may pull two cache lines on access, so we have to pessimistically assume twice // the cache line size for padding. TODO: Check if this is still true for modern // hardware. If not, DEFAULT_CACHE_LINE_SIZE might as well suffice. #define DEFAULT_PADDING_SIZE (DEFAULT_CACHE_LINE_SIZE*2) -#else -#define DEFAULT_PADDING_SIZE DEFAULT_CACHE_LINE_SIZE -#endif #if defined(LINUX) || defined(__APPLE__) #define SUPPORT_RESERVED_STACK_AREA diff --git a/src/hotspot/cpu/x86/globals_x86.hpp b/src/hotspot/cpu/x86/globals_x86.hpp index 54888a9f849d9..f74831edc9d5b 100644 --- a/src/hotspot/cpu/x86/globals_x86.hpp +++ b/src/hotspot/cpu/x86/globals_x86.hpp @@ -61,7 +61,6 @@ define_pd_global(intx, InlineSmallCode, 1000); #define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES #define MIN_STACK_RESERVED_PAGES (0) -#ifdef _LP64 // Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the // stack if compiled for unix and LP64. To pass stack overflow tests we need // 20 shadow pages. @@ -69,21 +68,13 @@ define_pd_global(intx, InlineSmallCode, 1000); // For those clients that do not use write socket, we allow // the min range value to be below that of the default #define MIN_STACK_SHADOW_PAGES (NOT_WIN64(10) WIN64_ONLY(8) DEBUG_ONLY(+4)) -#else -#define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5)) -#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES -#endif // _LP64 define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES); define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES); define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); -#ifdef _LP64 define_pd_global(bool, VMContinuations, true); -#else -define_pd_global(bool, VMContinuations, false); -#endif define_pd_global(bool, RewriteBytecodes, true); define_pd_global(bool, RewriteFrequentPairs, true); @@ -101,9 +92,6 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); range, \ constraint) \ \ - develop(bool, IEEEPrecision, true, \ - "Enables IEEE precision (for INTEL only)") \ - \ product(bool, UseStoreImmI16, true, \ "Use store immediate 16-bits value instruction on x86") \ \ diff --git a/src/hotspot/cpu/x86/icache_x86.cpp b/src/hotspot/cpu/x86/icache_x86.cpp index b9ec2f6d18649..85d3e76a24a27 100644 --- a/src/hotspot/cpu/x86/icache_x86.cpp +++ b/src/hotspot/cpu/x86/icache_x86.cpp @@ -32,7 +32,6 @@ void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flu StubCodeMark mark(this, "ICache", "flush_icache_stub"); address start = __ pc(); -#ifdef AMD64 const Register addr = c_rarg0; const Register lines = c_rarg1; @@ -57,10 +56,6 @@ void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flu __ bind(done); -#else - const Address magic(rsp, 3*wordSize); - __ lock(); __ addl(Address(rsp, 0), 0); -#endif // AMD64 __ movptr(rax, magic); // Handshake with caller to make sure it happened! __ ret(0); diff --git a/src/hotspot/cpu/x86/icache_x86.hpp b/src/hotspot/cpu/x86/icache_x86.hpp index 48286a7e3b385..f4dc49c732c88 100644 --- a/src/hotspot/cpu/x86/icache_x86.hpp +++ b/src/hotspot/cpu/x86/icache_x86.hpp @@ -40,21 +40,11 @@ class ICache : public AbstractICache { public: -#ifdef AMD64 enum { stub_size = 64, // Size of the icache flush stub in bytes line_size = 64, // Icache line size in bytes log2_line_size = 6 // log2(line_size) }; - - // Use default implementation -#else - enum { - stub_size = 16, // Size of the icache flush stub in bytes - line_size = BytesPerWord, // conservative - log2_line_size = LogBytesPerWord // log2(line_size) - }; -#endif // AMD64 }; #endif // CPU_X86_ICACHE_X86_HPP diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index 3a3f01a640983..9c98f305129e3 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -54,11 +54,7 @@ void InterpreterMacroAssembler::jump_to_entry(address entry) { void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { Label update, next, none; -#ifdef _LP64 assert_different_registers(obj, rscratch1, mdo_addr.base(), mdo_addr.index()); -#else - assert_different_registers(obj, mdo_addr.base(), mdo_addr.index()); -#endif interp_verify_oop(obj, atos); @@ -73,9 +69,7 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md bind(update); load_klass(obj, obj, rscratch1); -#ifdef _LP64 mov(rscratch1, obj); -#endif xorptr(obj, mdo_addr); testptr(obj, TypeEntries::type_klass_mask); @@ -90,7 +84,6 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md jccb(Assembler::equal, none); cmpptr(mdo_addr, TypeEntries::null_seen); jccb(Assembler::equal, none); -#ifdef _LP64 // There is a chance that the checks above (re-reading profiling // data from memory) fail if another thread has just set the // profiling to this obj's klass @@ -98,7 +91,6 @@ void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& md xorptr(obj, mdo_addr); testptr(obj, TypeEntries::type_klass_mask); jccb(Assembler::zero, next); -#endif // different than before. Cannot keep accurate profile. orptr(mdo_addr, TypeEntries::type_unknown); @@ -315,7 +307,6 @@ void InterpreterMacroAssembler::call_VM_base(Register oop_result, // really make a difference for these runtime calls, since they are // slow anyway. Btw., bcp must be saved/restored since it may change // due to GC. - NOT_LP64(assert(java_thread == noreg , "not expecting a precomputed java thread");) save_bcp(); #ifdef ASSERT { @@ -336,7 +327,6 @@ void InterpreterMacroAssembler::call_VM_base(Register oop_result, restore_locals(); } -#ifdef _LP64 void InterpreterMacroAssembler::call_VM_preemptable(Register oop_result, address entry_point, Register arg_1) { @@ -388,13 +378,6 @@ void InterpreterMacroAssembler::restore_after_resume(bool is_native) { push(ltos); } } -#else -void InterpreterMacroAssembler::call_VM_preemptable(Register oop_result, - address entry_point, - Register arg_1) { - MacroAssembler::call_VM(oop_result, entry_point, arg_1); -} -#endif // _LP64 void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { if (JvmtiExport::can_pop_frame()) { @@ -405,8 +388,7 @@ void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) // don't want to reenter. // This method is only called just after the call into the vm in // call_VM_base, so the arg registers are available. - Register pop_cond = NOT_LP64(java_thread) // Not clear if any other register is available on 32 bit - LP64_ONLY(c_rarg0); + Register pop_cond = c_rarg0; movl(pop_cond, Address(java_thread, JavaThread::popframe_condition_offset())); testl(pop_cond, JavaThread::popframe_pending_bit); jcc(Assembler::zero, L); @@ -417,18 +399,16 @@ void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); jmp(rax); bind(L); - NOT_LP64(get_thread(java_thread);) } } void InterpreterMacroAssembler::load_earlyret_value(TosState state) { - Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx); - NOT_LP64(get_thread(thread);) + Register thread = r15_thread; movptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset())); const Address tos_addr(rcx, JvmtiThreadState::earlyret_tos_offset()); const Address oop_addr(rcx, JvmtiThreadState::earlyret_oop_offset()); const Address val_addr(rcx, JvmtiThreadState::earlyret_value_offset()); -#ifdef _LP64 + switch (state) { case atos: movptr(rax, oop_addr); movptr(oop_addr, NULL_WORD); @@ -447,38 +427,14 @@ void InterpreterMacroAssembler::load_earlyret_value(TosState state) { // Clean up tos value in the thread object movl(tos_addr, ilgl); movl(val_addr, NULL_WORD); -#else - const Address val_addr1(rcx, JvmtiThreadState::earlyret_value_offset() - + in_ByteSize(wordSize)); - switch (state) { - case atos: movptr(rax, oop_addr); - movptr(oop_addr, NULL_WORD); - interp_verify_oop(rax, state); break; - case ltos: - movl(rdx, val_addr1); // fall through - case btos: // fall through - case ztos: // fall through - case ctos: // fall through - case stos: // fall through - case itos: movl(rax, val_addr); break; - case ftos: load_float(val_addr); break; - case dtos: load_double(val_addr); break; - case vtos: /* nothing to do */ break; - default : ShouldNotReachHere(); - } -#endif // _LP64 - // Clean up tos value in the thread object - movl(tos_addr, ilgl); - movptr(val_addr, NULL_WORD); - NOT_LP64(movptr(val_addr1, NULL_WORD);) } void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { if (JvmtiExport::can_force_early_return()) { Label L; - Register tmp = LP64_ONLY(c_rarg0) NOT_LP64(java_thread); - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(java_thread); + Register tmp = c_rarg0; + Register rthread = r15_thread; movptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset())); testptr(tmp, tmp); @@ -493,18 +449,11 @@ void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) // Call Interpreter::remove_activation_early_entry() to get the address of the // same-named entrypoint in the generated interpreter code. - NOT_LP64(get_thread(java_thread);) movptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset())); -#ifdef _LP64 movl(tmp, Address(tmp, JvmtiThreadState::earlyret_tos_offset())); call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), tmp); -#else - pushl(Address(tmp, JvmtiThreadState::earlyret_tos_offset())); - call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), 1); -#endif // _LP64 jmp(rax); bind(L); - NOT_LP64(get_thread(java_thread);) } } @@ -570,8 +519,8 @@ void InterpreterMacroAssembler::load_resolved_klass_at_index(Register klass, void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, Label& ok_is_subtype) { assert(Rsub_klass != rax, "rax holds superklass"); - LP64_ONLY(assert(Rsub_klass != r14, "r14 holds locals");) - LP64_ONLY(assert(Rsub_klass != r13, "r13 holds bcp");) + assert(Rsub_klass != r14, "r14 holds locals"); + assert(Rsub_klass != r13, "r13 holds bcp"); assert(Rsub_klass != rcx, "rcx holds 2ndary super array length"); assert(Rsub_klass != rdi, "rdi holds 2ndary super array scan ptr"); @@ -582,24 +531,6 @@ void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx } - -#ifndef _LP64 -void InterpreterMacroAssembler::f2ieee() { - if (IEEEPrecision) { - fstp_s(Address(rsp, 0)); - fld_s(Address(rsp, 0)); - } -} - - -void InterpreterMacroAssembler::d2ieee() { - if (IEEEPrecision) { - fstp_d(Address(rsp, 0)); - fld_d(Address(rsp, 0)); - } -} -#endif // _LP64 - // Java Expression Stack void InterpreterMacroAssembler::pop_ptr(Register r) { @@ -638,7 +569,6 @@ void InterpreterMacroAssembler::pop_d(XMMRegister r) { addptr(rsp, 2 * Interpreter::stackElementSize); } -#ifdef _LP64 void InterpreterMacroAssembler::pop_i(Register r) { // XXX can't use pop currently, upper half non clean movl(r, Address(rsp, 0)); @@ -689,105 +619,6 @@ void InterpreterMacroAssembler::push(TosState state) { default : ShouldNotReachHere(); } } -#else -void InterpreterMacroAssembler::pop_i(Register r) { - pop(r); -} - -void InterpreterMacroAssembler::pop_l(Register lo, Register hi) { - pop(lo); - pop(hi); -} - -void InterpreterMacroAssembler::pop_f() { - fld_s(Address(rsp, 0)); - addptr(rsp, 1 * wordSize); -} - -void InterpreterMacroAssembler::pop_d() { - fld_d(Address(rsp, 0)); - addptr(rsp, 2 * wordSize); -} - - -void InterpreterMacroAssembler::pop(TosState state) { - switch (state) { - case atos: pop_ptr(rax); break; - case btos: // fall through - case ztos: // fall through - case ctos: // fall through - case stos: // fall through - case itos: pop_i(rax); break; - case ltos: pop_l(rax, rdx); break; - case ftos: - if (UseSSE >= 1) { - pop_f(xmm0); - } else { - pop_f(); - } - break; - case dtos: - if (UseSSE >= 2) { - pop_d(xmm0); - } else { - pop_d(); - } - break; - case vtos: /* nothing to do */ break; - default : ShouldNotReachHere(); - } - interp_verify_oop(rax, state); -} - - -void InterpreterMacroAssembler::push_l(Register lo, Register hi) { - push(hi); - push(lo); -} - -void InterpreterMacroAssembler::push_f() { - // Do not schedule for no AGI! Never write beyond rsp! - subptr(rsp, 1 * wordSize); - fstp_s(Address(rsp, 0)); -} - -void InterpreterMacroAssembler::push_d() { - // Do not schedule for no AGI! Never write beyond rsp! - subptr(rsp, 2 * wordSize); - fstp_d(Address(rsp, 0)); -} - - -void InterpreterMacroAssembler::push(TosState state) { - interp_verify_oop(rax, state); - switch (state) { - case atos: push_ptr(rax); break; - case btos: // fall through - case ztos: // fall through - case ctos: // fall through - case stos: // fall through - case itos: push_i(rax); break; - case ltos: push_l(rax, rdx); break; - case ftos: - if (UseSSE >= 1) { - push_f(xmm0); - } else { - push_f(); - } - break; - case dtos: - if (UseSSE >= 2) { - push_d(xmm0); - } else { - push_d(); - } - break; - case vtos: /* nothing to do */ break; - default : ShouldNotReachHere(); - } -} -#endif // _LP64 - // Helpers for swap and dup void InterpreterMacroAssembler::load_ptr(int n, Register val) { @@ -822,8 +653,7 @@ void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register // interp_only_mode if these events CAN be enabled. // interp_only is an int, on little endian it is sufficient to test the byte only // Is a cmpl faster? - LP64_ONLY(temp = r15_thread;) - NOT_LP64(get_thread(temp);) + temp = r15_thread; cmpb(Address(temp, JavaThread::interp_only_mode_offset()), 0); jccb(Assembler::zero, run_compiled_code); jmp(Address(method, Method::interpreter_entry_offset())); @@ -847,7 +677,6 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bool verifyoop, bool generate_poll) { - verify_FPU(1, state); if (VerifyActivationFrameSize) { Label L; mov(rcx, rbp); @@ -865,7 +694,6 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, } address* const safepoint_table = Interpreter::safept_table(state); -#ifdef _LP64 Label no_safepoint, dispatch; if (table != safepoint_table && generate_poll) { NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); @@ -880,27 +708,6 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, lea(rscratch1, ExternalAddress((address)table)); bind(dispatch); jmp(Address(rscratch1, rbx, Address::times_8)); - -#else - Address index(noreg, rbx, Address::times_ptr); - if (table != safepoint_table && generate_poll) { - NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); - Label no_safepoint; - const Register thread = rcx; - get_thread(thread); - testb(Address(thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); - - jccb(Assembler::zero, no_safepoint); - ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index); - jump(dispatch_addr, noreg); - bind(no_safepoint); - } - - { - ArrayAddress dispatch_addr(ExternalAddress((address)table), index); - jump(dispatch_addr, noreg); - } -#endif // _LP64 } void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { @@ -952,24 +759,19 @@ void InterpreterMacroAssembler::narrow(Register result) { bind(notBool); cmpl(rcx, T_BYTE); jcc(Assembler::notEqual, notByte); - LP64_ONLY(movsbl(result, result);) - NOT_LP64(shll(result, 24);) // truncate upper 24 bits - NOT_LP64(sarl(result, 24);) // and sign-extend byte + movsbl(result, result); jmp(done); bind(notByte); cmpl(rcx, T_CHAR); jcc(Assembler::notEqual, notChar); - LP64_ONLY(movzwl(result, result);) - NOT_LP64(andl(result, 0xFFFF);) // truncate upper 16 bits + movzwl(result, result); jmp(done); bind(notChar); // cmpl(rcx, T_SHORT); // all that's left // jcc(Assembler::notEqual, done); - LP64_ONLY(movswl(result, result);) - NOT_LP64(shll(result, 16);) // truncate upper 16 bits - NOT_LP64(sarl(result, 16);) // and sign-extend short + movswl(result, result); // Nothing to do for T_INT bind(done); @@ -999,12 +801,11 @@ void InterpreterMacroAssembler::remove_activation( // result check if synchronized method Label unlocked, unlock, no_unlock; - const Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); - const Register robj = LP64_ONLY(c_rarg1) NOT_LP64(rdx); - const Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rcx); + const Register rthread = r15_thread; + const Register robj = c_rarg1; + const Register rmon = c_rarg1; // monitor pointers need different register // because rdx may have the result in it - NOT_LP64(get_thread(rthread);) // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, // that would normally not be safe to use. Such bad returns into unsafe territory of @@ -1017,7 +818,6 @@ void InterpreterMacroAssembler::remove_activation( push(state); set_last_Java_frame(rthread, noreg, rbp, (address)pc(), rscratch1); super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); - NOT_LP64(get_thread(rthread);) // call_VM clobbered it, restore reset_last_Java_frame(rthread, true); pop(state); bind(fast_path); @@ -1058,7 +858,6 @@ void InterpreterMacroAssembler::remove_activation( pop(state); if (throw_monitor_exception) { // Entry already unlocked, need to throw exception - NOT_LP64(empty_FPU_stack();) // remove possible return value from FPU-stack, otherwise stack could overflow call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_illegal_monitor_state_exception)); should_not_reach_here(); @@ -1067,7 +866,6 @@ void InterpreterMacroAssembler::remove_activation( // install an illegal_monitor_state_exception. Continue with // stack unrolling. if (install_monitor_exception) { - NOT_LP64(empty_FPU_stack();) call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::new_illegal_monitor_state_exception)); } @@ -1109,7 +907,6 @@ void InterpreterMacroAssembler::remove_activation( if (throw_monitor_exception) { // Throw exception - NOT_LP64(empty_FPU_stack();) MacroAssembler::call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime:: throw_illegal_monitor_state_exception)); @@ -1125,7 +922,6 @@ void InterpreterMacroAssembler::remove_activation( pop(state); if (install_monitor_exception) { - NOT_LP64(empty_FPU_stack();) call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime:: new_illegal_monitor_state_exception)); @@ -1160,11 +956,9 @@ void InterpreterMacroAssembler::remove_activation( Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); if (StackReservedPages > 0) { // testing if reserved zone needs to be re-enabled - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); + Register rthread = r15_thread; Label no_reserved_zone_enabling; - NOT_LP64(get_thread(rthread);) - // check if already enabled - if so no re-enabling needed assert(sizeof(StackOverflow::StackGuardState) == 4, "unexpected size"); cmpl(Address(rthread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_enabled); @@ -1210,8 +1004,7 @@ void InterpreterMacroAssembler::get_method_counters(Register method, // Kills: // rax, rbx void InterpreterMacroAssembler::lock_object(Register lock_reg) { - assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx), - "The argument is only for looks. It must be c_rarg1"); + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); if (LockingMode == LM_MONITOR) { call_VM_preemptable(noreg, @@ -1222,7 +1015,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { const Register swap_reg = rax; // Must use rax for cmpxchg instruction const Register tmp_reg = rbx; - const Register obj_reg = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // Will contain the oop + const Register obj_reg = c_rarg3; // Will contain the oop const Register rklass_decode_tmp = rscratch1; const int obj_offset = in_bytes(BasicObjectLock::obj_offset()); @@ -1240,13 +1033,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { } if (LockingMode == LM_LIGHTWEIGHT) { -#ifdef _LP64 - const Register thread = r15_thread; - lightweight_lock(lock_reg, obj_reg, swap_reg, thread, tmp_reg, slow_case); -#else - // Lacking registers and thread on x86_32. Always take slow path. - jmp(slow_case); -#endif + lightweight_lock(lock_reg, obj_reg, swap_reg, r15_thread, tmp_reg, slow_case); } else if (LockingMode == LM_LEGACY) { // Load immediate 1 into swap_reg %rax movl(swap_reg, 1); @@ -1264,7 +1051,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); jcc(Assembler::zero, count_locking); - const int zero_bits = LP64_ONLY(7) NOT_LP64(3); + const int zero_bits = 7; // Fast check for recursive lock. // @@ -1329,8 +1116,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) { // rscratch1 (scratch reg) // rax, rbx, rcx, rdx void InterpreterMacroAssembler::unlock_object(Register lock_reg) { - assert(lock_reg == LP64_ONLY(c_rarg1) NOT_LP64(rdx), - "The argument is only for looks. It must be c_rarg1"); + assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1"); if (LockingMode == LM_MONITOR) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); @@ -1338,8 +1124,8 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { Label count_locking, done, slow_case; const Register swap_reg = rax; // Must use rax for cmpxchg instruction - const Register header_reg = LP64_ONLY(c_rarg2) NOT_LP64(rbx); // Will contain the old oopMark - const Register obj_reg = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // Will contain the oop + const Register header_reg = c_rarg2; // Will contain the old oopMark + const Register obj_reg = c_rarg3; // Will contain the oop save_bcp(); // Save in case of exception @@ -1356,12 +1142,7 @@ void InterpreterMacroAssembler::unlock_object(Register lock_reg) { movptr(Address(lock_reg, BasicObjectLock::obj_offset()), NULL_WORD); if (LockingMode == LM_LIGHTWEIGHT) { -#ifdef _LP64 lightweight_unlock(obj_reg, swap_reg, r15_thread, header_reg, slow_case); -#else - // Lacking registers and thread on x86_32. Always take slow path. - jmp(slow_case); -#endif } else if (LockingMode == LM_LEGACY) { // Load the old header from BasicLock structure movptr(header_reg, Address(swap_reg, @@ -1437,8 +1218,8 @@ void InterpreterMacroAssembler::verify_method_data_pointer() { Label verify_continue; push(rax); push(rbx); - Register arg3_reg = LP64_ONLY(c_rarg3) NOT_LP64(rcx); - Register arg2_reg = LP64_ONLY(c_rarg2) NOT_LP64(rdx); + Register arg3_reg = c_rarg3; + Register arg2_reg = c_rarg2; push(arg3_reg); push(arg2_reg); test_method_data_pointer(arg3_reg, verify_continue); // If mdp is zero, continue @@ -1896,8 +1677,6 @@ void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, // Record the object type. record_klass_in_profile(klass, mdp, reg2, false); - NOT_LP64(assert(reg2 == rdi, "we know how to fix this blown reg");) - NOT_LP64(restore_locals();) // Restore EDI } update_mdp_by_constant(mdp, mdp_delta); @@ -1965,15 +1744,6 @@ void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, } } -void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { -#ifndef _LP64 - if ((state == ftos && UseSSE < 1) || - (state == dtos && UseSSE < 2)) { - MacroAssembler::verify_FPU(stack_depth); - } -#endif -} - // Jump if ((*counter_addr += increment) & mask) == 0 void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, Address mask, Register scratch, Label* where) { @@ -1993,11 +1763,10 @@ void InterpreterMacroAssembler::notify_method_entry() { // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add // the code to check if the event should be sent. - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); - Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rbx); + Register rthread = r15_thread; + Register rarg = c_rarg1; if (JvmtiExport::can_post_interpreter_events()) { Label L; - NOT_LP64(get_thread(rthread);) movl(rdx, Address(rthread, JavaThread::interp_only_mode_offset())); testl(rdx, rdx); jcc(Assembler::zero, L); @@ -2007,7 +1776,6 @@ void InterpreterMacroAssembler::notify_method_entry() { } if (DTraceMethodProbes) { - NOT_LP64(get_thread(rthread);) get_method(rarg); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), rthread, rarg); @@ -2015,7 +1783,6 @@ void InterpreterMacroAssembler::notify_method_entry() { // RedefineClasses() tracing support for obsolete method entry if (log_is_enabled(Trace, redefine, class, obsolete)) { - NOT_LP64(get_thread(rthread);) get_method(rarg); call_VM_leaf( CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), @@ -2029,8 +1796,8 @@ void InterpreterMacroAssembler::notify_method_exit( // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to // track stack depth. If it is possible to enter interp_only_mode we add // the code to check if the event should be sent. - Register rthread = LP64_ONLY(r15_thread) NOT_LP64(rcx); - Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rbx); + Register rthread = r15_thread; + Register rarg = c_rarg1; if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { Label L; // Note: frame::interpreter_frame_result has a dependency on how the @@ -2040,7 +1807,6 @@ void InterpreterMacroAssembler::notify_method_exit( // template interpreter will leave the result on the top of the stack. push(state); - NOT_LP64(get_thread(rthread);) movl(rdx, Address(rthread, JavaThread::interp_only_mode_offset())); testl(rdx, rdx); jcc(Assembler::zero, L); @@ -2052,7 +1818,6 @@ void InterpreterMacroAssembler::notify_method_exit( if (DTraceMethodProbes) { push(state); - NOT_LP64(get_thread(rthread);) get_method(rarg); call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), rthread, rarg); diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp index 5d9a9071f8add..e537e9efc9678 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.hpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp @@ -53,8 +53,8 @@ class InterpreterMacroAssembler: public MacroAssembler { public: InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), - _locals_register(LP64_ONLY(r14) NOT_LP64(rdi)), - _bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {} + _locals_register(r14), + _bcp_register(r13) {} void jump_to_entry(address entry); @@ -121,9 +121,6 @@ class InterpreterMacroAssembler: public MacroAssembler { Register cpool, // the constant pool (corrupted on return) Register index); // the constant pool index (corrupted on return) - NOT_LP64(void f2ieee();) // truncate ftos to 32bits - NOT_LP64(void d2ieee();) // truncate dtos to 64bits - // Expression stack void pop_ptr(Register r = rax); void pop_i(Register r = rax); @@ -143,18 +140,8 @@ class InterpreterMacroAssembler: public MacroAssembler { void pop_f(XMMRegister r); void pop_d(XMMRegister r); void push_d(XMMRegister r); -#ifdef _LP64 void pop_l(Register r = rax); void push_l(Register r = rax); -#else - void pop_l(Register lo = rax, Register hi = rdx); - void pop_f(); - void pop_d(); - - void push_l(Register lo = rax, Register hi = rdx); - void push_d(); - void push_f(); -#endif // _LP64 void pop(Register r) { ((MacroAssembler*)this)->pop(r); } void push(Register r) { ((MacroAssembler*)this)->push(r); } @@ -168,7 +155,6 @@ class InterpreterMacroAssembler: public MacroAssembler { lea(rsp, Address(rbp, rcx, Address::times_ptr)); // null last_sp until next java call movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - NOT_LP64(empty_FPU_stack()); } // Helpers for swap and dup @@ -273,8 +259,6 @@ class InterpreterMacroAssembler: public MacroAssembler { // only if +VerifyOops && state == atos #define interp_verify_oop(reg, state) _interp_verify_oop(reg, state, __FILE__, __LINE__); void _interp_verify_oop(Register reg, TosState state, const char* file, int line); - // only if +VerifyFPU && (state == ftos || state == dtos) - void verify_FPU(int stack_depth, TosState state = ftos); typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; diff --git a/src/hotspot/cpu/x86/interpreterRT_x86.hpp b/src/hotspot/cpu/x86/interpreterRT_x86.hpp index e914b4947a15e..6875280d9f5e6 100644 --- a/src/hotspot/cpu/x86/interpreterRT_x86.hpp +++ b/src/hotspot/cpu/x86/interpreterRT_x86.hpp @@ -33,7 +33,6 @@ class SignatureHandlerGenerator: public NativeSignatureIterator { private: MacroAssembler* _masm; -#ifdef AMD64 #ifdef _WIN64 unsigned int _num_args; #else @@ -41,17 +40,11 @@ class SignatureHandlerGenerator: public NativeSignatureIterator { unsigned int _num_int_args; #endif // _WIN64 int _stack_offset; -#else - void move(int from_offset, int to_offset); - void box(int from_offset, int to_offset); -#endif // AMD64 void pass_int(); void pass_long(); void pass_float(); -#ifdef AMD64 void pass_double(); -#endif // AMD64 void pass_object(); public: diff --git a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp b/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp deleted file mode 100644 index 4f463b1d77140..0000000000000 --- a/src/hotspot/cpu/x86/interpreterRT_x86_32.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "memory/allocation.inline.hpp" -#include "oops/method.hpp" -#include "oops/oop.inline.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/icache.hpp" -#include "runtime/interfaceSupport.inline.hpp" -#include "runtime/signature.hpp" - - -#define __ _masm-> - - -// Implementation of SignatureHandlerGenerator -InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) : - NativeSignatureIterator(method) { - _masm = new MacroAssembler(buffer); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { - move(offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { - move(offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { - move(offset(), jni_offset() + 2); - move(offset() + 1, jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { - box (offset(), jni_offset() + 1); -} - -void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { - __ movl(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset))); - __ movl(Address(to(), to_offset * wordSize), temp()); -} - - -void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { - __ lea(temp(), Address(from(), Interpreter::local_offset_in_bytes(from_offset))); - __ cmpptr(Address(from(), Interpreter::local_offset_in_bytes(from_offset)), NULL_WORD); // do not use temp() to avoid AGI - Label L; - __ jcc(Assembler::notZero, L); - __ movptr(temp(), NULL_WORD); - __ bind(L); - __ movptr(Address(to(), to_offset * wordSize), temp()); -} - - -void InterpreterRuntime::SignatureHandlerGenerator::generate( uint64_t fingerprint) { - // generate code to handle arguments - iterate(fingerprint); - // return result handler - __ lea(rax, - ExternalAddress((address)Interpreter::result_handler(method()->result_type()))); - // return - __ ret(0); - __ flush(); -} - - -Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rdi; } -Register InterpreterRuntime::SignatureHandlerGenerator::to() { return rsp; } -Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rcx; } - - -// Implementation of SignatureHandlerLibrary - -void SignatureHandlerLibrary::pd_set_handler(address handler) {} - -class SlowSignatureHandler: public NativeSignatureIterator { - private: - address _from; - intptr_t* _to; - - virtual void pass_int() { - *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; - } - - virtual void pass_float() { - *_to++ = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); - _from -= Interpreter::stackElementSize; - } - - virtual void pass_long() { - _to[0] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); - _to[1] = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(0)); - _to += 2; - _from -= 2*Interpreter::stackElementSize; - } - - virtual void pass_object() { - // pass address of from - intptr_t from_addr = (intptr_t)(_from + Interpreter::local_offset_in_bytes(0)); - *_to++ = (*(intptr_t*)from_addr == 0) ? NULL_WORD : from_addr; - _from -= Interpreter::stackElementSize; - } - - public: - SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to) : - NativeSignatureIterator(method) { - _from = from; - _to = to + (is_static() ? 2 : 1); - } -}; - -JRT_ENTRY(address, InterpreterRuntime::slow_signature_handler(JavaThread* current, Method* method, intptr_t* from, intptr_t* to)) - methodHandle m(current, (Method*)method); - assert(m->is_native(), "sanity check"); - // handle arguments - SlowSignatureHandler(m, (address)from, to + 1).iterate((uint64_t)CONST64(-1)); - // return result handler - return Interpreter::result_handler(m->result_type()); -JRT_END diff --git a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp b/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp deleted file mode 100644 index 123362894122c..0000000000000 --- a/src/hotspot/cpu/x86/jniFastGetField_x86_32.cpp +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "memory/resourceArea.hpp" -#include "prims/jniFastGetField.hpp" -#include "prims/jvm_misc.hpp" -#include "prims/jvmtiExport.hpp" -#include "runtime/os.inline.hpp" -#include "runtime/safepoint.hpp" -#include "runtime/stubRoutines.hpp" - -#define __ masm-> - -#define BUFFER_SIZE 30 - -// Instead of issuing lfence for LoadLoad barrier, we create data dependency -// between loads, which is much more efficient than lfence. - -address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { - const char *name = nullptr; - switch (type) { - case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; - case T_BYTE: name = "jni_fast_GetByteField"; break; - case T_CHAR: name = "jni_fast_GetCharField"; break; - case T_SHORT: name = "jni_fast_GetShortField"; break; - case T_INT: name = "jni_fast_GetIntField"; break; - default: ShouldNotReachHere(); - } - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow; - - // stack layout: offset from rsp (in words): - // return pc 0 - // jni env 1 - // obj 2 - // jfieldID 3 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr (rax, 2); // offset - - assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); - speculative_load_pclist[count] = __ pc(); - switch (type) { - case T_BOOLEAN: __ movzbl (rax, Address(rdx, rax, Address::times_1)); break; - case T_BYTE: __ movsbl (rax, Address(rdx, rax, Address::times_1)); break; - case T_CHAR: __ movzwl (rax, Address(rdx, rax, Address::times_1)); break; - case T_SHORT: __ movswl (rax, Address(rdx, rax, Address::times_1)); break; - case T_INT: __ movl (rax, Address(rdx, rax, Address::times_1)); break; - default: ShouldNotReachHere(); - } - - Address ca1; - __ lea(rdx, counter); - __ xorptr(rdx, rax); - __ xorptr(rdx, rax); - __ cmp32(rcx, Address(rdx, 0)); - // ca1 is the same as ca because - // rax, ^ counter_addr ^ rax, = address - // ca1 is data dependent on rax,. - __ jcc (Assembler::notEqual, slow); - - __ ret (0); - - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - address slow_case_addr = nullptr; - switch (type) { - case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; - case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; - case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; - case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; - case T_INT: slow_case_addr = jni_GetIntField_addr(); break; - default: ShouldNotReachHere(); - } - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_boolean_field() { - return generate_fast_get_int_field0(T_BOOLEAN); -} - -address JNI_FastGetField::generate_fast_get_byte_field() { - return generate_fast_get_int_field0(T_BYTE); -} - -address JNI_FastGetField::generate_fast_get_char_field() { - return generate_fast_get_int_field0(T_CHAR); -} - -address JNI_FastGetField::generate_fast_get_short_field() { - return generate_fast_get_int_field0(T_SHORT); -} - -address JNI_FastGetField::generate_fast_get_int_field() { - return generate_fast_get_int_field0(T_INT); -} - -address JNI_FastGetField::generate_fast_get_long_field() { - const char *name = "jni_fast_GetLongField"; - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow; - - // stack layout: offset from rsp (in words): - // old rsi 0 - // return pc 1 - // jni env 2 - // obj 3 - // jfieldID 4 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - - __ push (rsi); - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 3*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rsi, Address(rsp, 4*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr(rsi, 2); // offset - - assert(count < LIST_CAPACITY-1, "LIST_CAPACITY too small"); - speculative_load_pclist[count++] = __ pc(); - __ movptr(rax, Address(rdx, rsi, Address::times_1)); - speculative_load_pclist[count] = __ pc(); - __ movl(rdx, Address(rdx, rsi, Address::times_1, 4)); - - __ lea(rsi, counter); - __ xorptr(rsi, rdx); - __ xorptr(rsi, rax); - __ xorptr(rsi, rdx); - __ xorptr(rsi, rax); - __ cmp32(rcx, Address(rsi, 0)); - // ca1 is the same as ca because - // rax, ^ rdx ^ counter_addr ^ rax, ^ rdx = address - // ca1 is data dependent on both rax, and rdx. - __ jcc (Assembler::notEqual, slow); - - __ pop (rsi); - - __ ret (0); - - slowcase_entry_pclist[count-1] = __ pc(); - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - __ pop (rsi); - address slow_case_addr = jni_GetLongField_addr();; - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) { - const char *name = nullptr; - switch (type) { - case T_FLOAT: name = "jni_fast_GetFloatField"; break; - case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; - default: ShouldNotReachHere(); - } - ResourceMark rm; - BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE*wordSize); - CodeBuffer cbuf(blob); - MacroAssembler* masm = new MacroAssembler(&cbuf); - address fast_entry = __ pc(); - - Label slow_with_pop, slow; - - // stack layout: offset from rsp (in words): - // return pc 0 - // jni env 1 - // obj 2 - // jfieldID 3 - - ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr()); - - __ mov32 (rcx, counter); - __ testb (rcx, 1); - __ jcc (Assembler::notZero, slow); - - if (JvmtiExport::can_post_field_access()) { - // Check to see if a field access watch has been set before we - // take the fast path. - __ cmp32(ExternalAddress((address) JvmtiExport::get_field_access_count_addr()), 0); - __ jcc(Assembler::notZero, slow); - } - - __ mov(rax, rcx); - __ andptr(rax, 1); // rax, must end up 0 - __ movptr(rdx, Address(rsp, rax, Address::times_1, 2*wordSize)); - // obj, notice rax, is 0. - // rdx is data dependent on rcx. - __ movptr(rax, Address(rsp, 3*wordSize)); // jfieldID - - __ clear_jobject_tag(rdx); - - __ movptr(rdx, Address(rdx, 0)); // *obj - __ shrptr(rax, 2); // offset - - assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); - speculative_load_pclist[count] = __ pc(); - switch (type) { - case T_FLOAT: __ fld_s (Address(rdx, rax, Address::times_1)); break; - case T_DOUBLE: __ fld_d (Address(rdx, rax, Address::times_1)); break; - default: ShouldNotReachHere(); - } - - Address ca1; - __ fst_s (Address(rsp, -4)); - __ lea(rdx, counter); - __ movl (rax, Address(rsp, -4)); - // garbage hi-order bits on 64bit are harmless. - __ xorptr(rdx, rax); - __ xorptr(rdx, rax); - __ cmp32(rcx, Address(rdx, 0)); - // rax, ^ counter_addr ^ rax, = address - // ca1 is data dependent on the field - // access. - __ jcc (Assembler::notEqual, slow_with_pop); - - __ ret (0); - - __ bind (slow_with_pop); - // invalid load. pop FPU stack. - __ fstp_d (0); - - slowcase_entry_pclist[count++] = __ pc(); - __ bind (slow); - address slow_case_addr = nullptr; - switch (type) { - case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; - case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; - default: ShouldNotReachHere(); - } - // tail call - __ jump (RuntimeAddress(slow_case_addr)); - - __ flush (); - - return fast_entry; -} - -address JNI_FastGetField::generate_fast_get_float_field() { - return generate_fast_get_float_field0(T_FLOAT); -} - -address JNI_FastGetField::generate_fast_get_double_field() { - return generate_fast_get_float_field0(T_DOUBLE); -} diff --git a/src/hotspot/cpu/x86/jniTypes_x86.hpp b/src/hotspot/cpu/x86/jniTypes_x86.hpp index 5c925474796d4..645f5a1abfe8e 100644 --- a/src/hotspot/cpu/x86/jniTypes_x86.hpp +++ b/src/hotspot/cpu/x86/jniTypes_x86.hpp @@ -44,20 +44,12 @@ class JNITypes : AllStatic { private: -#ifndef AMD64 - // 32bit Helper routines. - static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; - *(jint *)(to ) = from[0]; } - static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } -#endif // AMD64 - public: // Ints are stored in native format in one JavaCallArgument slot at *to. static inline void put_int(jint from, intptr_t *to) { *(jint *)(to + 0 ) = from; } static inline void put_int(jint from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = from; } static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; } -#ifdef AMD64 // Longs are stored in native format in one JavaCallArgument slot at // *(to+1). static inline void put_long(jlong from, intptr_t *to) { @@ -73,13 +65,6 @@ class JNITypes : AllStatic { *(jlong*) (to + 1 + pos) = *from; pos += 2; } -#else - // Longs are stored in big-endian word format in two JavaCallArgument slots at *to. - // The high half is in *to and the low half in *(to+1). - static inline void put_long(jlong from, intptr_t *to) { put_int2r((jint *)&from, to); } - static inline void put_long(jlong from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } - static inline void put_long(jlong *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } -#endif // AMD64 // Oops are stored in native format in one JavaCallArgument slot at *to. static inline void put_obj(const Handle& from_handle, intptr_t *to, int& pos) { *(to + pos++) = (intptr_t)from_handle.raw_value(); } @@ -91,7 +76,7 @@ class JNITypes : AllStatic { static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } #undef _JNI_SLOT_OFFSET -#ifdef AMD64 + #define _JNI_SLOT_OFFSET 1 // Doubles are stored in native word format in one JavaCallArgument // slot at *(to+1). @@ -108,15 +93,6 @@ class JNITypes : AllStatic { *(jdouble*) (to + 1 + pos) = *from; pos += 2; } -#else -#define _JNI_SLOT_OFFSET 0 - // Doubles are stored in big-endian word format in two JavaCallArgument slots at *to. - // The high half is in *to and the low half in *(to+1). - static inline void put_double(jdouble from, intptr_t *to) { put_int2r((jint *)&from, to); } - static inline void put_double(jdouble from, intptr_t *to, int& pos) { put_int2r((jint *)&from, to, pos); } - static inline void put_double(jdouble *from, intptr_t *to, int& pos) { put_int2r((jint *) from, to, pos); } -#endif // AMD64 - // The get_xxx routines, on the other hand, actually _do_ fetch // java primitive types from the interpreter stack. diff --git a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp b/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp index 8eff2590bfcea..3531ebbf36e30 100644 --- a/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp +++ b/src/hotspot/cpu/x86/jvmciCodeInstaller_x86.cpp @@ -78,14 +78,10 @@ void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& obj, bool compre address pc = _instructions->start() + pc_offset; jobject value = JNIHandles::make_local(obj()); if (compressed) { -#ifdef _LP64 address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand); int oop_index = _oop_recorder->find_index(value); _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand); JVMCI_event_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); -#else - JVMCI_ERROR("compressed oop on 32bit"); -#endif } else { address operand = Assembler::locate_operand(pc, Assembler::imm_operand); *((jobject*) operand) = value; @@ -97,13 +93,9 @@ void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& obj, bool compre void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, HotSpotCompiledCodeStream* stream, u1 tag, JVMCI_TRAPS) { address pc = _instructions->start() + pc_offset; if (tag == PATCH_NARROW_KLASS) { -#ifdef _LP64 address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand); *((narrowKlass*) operand) = record_narrow_metadata_reference(_instructions, operand, stream, tag, JVMCI_CHECK); JVMCI_event_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand)); -#else - JVMCI_ERROR("compressed Klass* on 32bit"); -#endif } else { address operand = Assembler::locate_operand(pc, Assembler::imm_operand); *((void**) operand) = record_metadata_reference(_instructions, operand, stream, tag, JVMCI_CHECK); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index a798dea08cc79..0b1ef5cec0e4b 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -94,395 +94,6 @@ static const Assembler::Condition reverse[] = { // Implementation of MacroAssembler -// First all the versions that have distinct versions depending on 32/64 bit -// Unless the difference is trivial (1 line or so). - -#ifndef _LP64 - -// 32bit versions - -Address MacroAssembler::as_Address(AddressLiteral adr) { - return Address(adr.target(), adr.rspec()); -} - -Address MacroAssembler::as_Address(ArrayAddress adr, Register rscratch) { - assert(rscratch == noreg, ""); - return Address::make_array(adr); -} - -void MacroAssembler::call_VM_leaf_base(address entry_point, - int number_of_arguments) { - call(RuntimeAddress(entry_point)); - increment(rsp, number_of_arguments * wordSize); -} - -void MacroAssembler::cmpklass(Address src1, Metadata* obj) { - cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - - -void MacroAssembler::cmpklass(Register src1, Metadata* obj) { - cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::cmpoop(Address src1, jobject obj) { - cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::cmpoop(Register src1, jobject obj, Register rscratch) { - assert(rscratch == noreg, "redundant"); - cmp_literal32(src1, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::extend_sign(Register hi, Register lo) { - // According to Intel Doc. AP-526, "Integer Divide", p.18. - if (VM_Version::is_P6() && hi == rdx && lo == rax) { - cdql(); - } else { - movl(hi, lo); - sarl(hi, 31); - } -} - -void MacroAssembler::jC2(Register tmp, Label& L) { - // set parity bit if FPU flag C2 is set (via rax) - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - // branch - jcc(Assembler::parity, L); -} - -void MacroAssembler::jnC2(Register tmp, Label& L) { - // set parity bit if FPU flag C2 is set (via rax) - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - // branch - jcc(Assembler::noParity, L); -} - -// 32bit can do a case table jump in one instruction but we no longer allow the base -// to be installed in the Address class -void MacroAssembler::jump(ArrayAddress entry, Register rscratch) { - assert(rscratch == noreg, "not needed"); - jmp(as_Address(entry, noreg)); -} - -// Note: y_lo will be destroyed -void MacroAssembler::lcmp2int(Register x_hi, Register x_lo, Register y_hi, Register y_lo) { - // Long compare for Java (semantics as described in JVM spec.) - Label high, low, done; - - cmpl(x_hi, y_hi); - jcc(Assembler::less, low); - jcc(Assembler::greater, high); - // x_hi is the return register - xorl(x_hi, x_hi); - cmpl(x_lo, y_lo); - jcc(Assembler::below, low); - jcc(Assembler::equal, done); - - bind(high); - xorl(x_hi, x_hi); - increment(x_hi); - jmp(done); - - bind(low); - xorl(x_hi, x_hi); - decrementl(x_hi); - - bind(done); -} - -void MacroAssembler::lea(Register dst, AddressLiteral src) { - mov_literal32(dst, (int32_t)src.target(), src.rspec()); -} - -void MacroAssembler::lea(Address dst, AddressLiteral adr, Register rscratch) { - assert(rscratch == noreg, "not needed"); - - // leal(dst, as_Address(adr)); - // see note in movl as to why we must use a move - mov_literal32(dst, (int32_t)adr.target(), adr.rspec()); -} - -void MacroAssembler::leave() { - mov(rsp, rbp); - pop(rbp); -} - -void MacroAssembler::lmul(int x_rsp_offset, int y_rsp_offset) { - // Multiplication of two Java long values stored on the stack - // as illustrated below. Result is in rdx:rax. - // - // rsp ---> [ ?? ] \ \ - // .... | y_rsp_offset | - // [ y_lo ] / (in bytes) | x_rsp_offset - // [ y_hi ] | (in bytes) - // .... | - // [ x_lo ] / - // [ x_hi ] - // .... - // - // Basic idea: lo(result) = lo(x_lo * y_lo) - // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - Address x_hi(rsp, x_rsp_offset + wordSize); Address x_lo(rsp, x_rsp_offset); - Address y_hi(rsp, y_rsp_offset + wordSize); Address y_lo(rsp, y_rsp_offset); - Label quick; - // load x_hi, y_hi and check if quick - // multiplication is possible - movl(rbx, x_hi); - movl(rcx, y_hi); - movl(rax, rbx); - orl(rbx, rcx); // rbx, = 0 <=> x_hi = 0 and y_hi = 0 - jcc(Assembler::zero, quick); // if rbx, = 0 do quick multiply - // do full multiplication - // 1st step - mull(y_lo); // x_hi * y_lo - movl(rbx, rax); // save lo(x_hi * y_lo) in rbx, - // 2nd step - movl(rax, x_lo); - mull(rcx); // x_lo * y_hi - addl(rbx, rax); // add lo(x_lo * y_hi) to rbx, - // 3rd step - bind(quick); // note: rbx, = 0 if quick multiply! - movl(rax, x_lo); - mull(y_lo); // x_lo * y_lo - addl(rdx, rbx); // correct hi(x_lo * y_lo) -} - -void MacroAssembler::lneg(Register hi, Register lo) { - negl(lo); - adcl(hi, 0); - negl(hi); -} - -void MacroAssembler::lshl(Register hi, Register lo) { - // Java shift left long support (semantics as described in JVM spec., p.305) - // (basic idea for shift counts s >= n: x << s == (x << n) << (s - n)) - // shift value is in rcx ! - assert(hi != rcx, "must not use rcx"); - assert(lo != rcx, "must not use rcx"); - const Register s = rcx; // shift count - const int n = BitsPerWord; - Label L; - andl(s, 0x3f); // s := s & 0x3f (s < 0x40) - cmpl(s, n); // if (s < n) - jcc(Assembler::less, L); // else (s >= n) - movl(hi, lo); // x := x << n - xorl(lo, lo); - // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! - bind(L); // s (mod n) < n - shldl(hi, lo); // x := x << s - shll(lo); -} - - -void MacroAssembler::lshr(Register hi, Register lo, bool sign_extension) { - // Java shift right long support (semantics as described in JVM spec., p.306 & p.310) - // (basic idea for shift counts s >= n: x >> s == (x >> n) >> (s - n)) - assert(hi != rcx, "must not use rcx"); - assert(lo != rcx, "must not use rcx"); - const Register s = rcx; // shift count - const int n = BitsPerWord; - Label L; - andl(s, 0x3f); // s := s & 0x3f (s < 0x40) - cmpl(s, n); // if (s < n) - jcc(Assembler::less, L); // else (s >= n) - movl(lo, hi); // x := x >> n - if (sign_extension) sarl(hi, 31); - else xorl(hi, hi); - // Note: subl(s, n) is not needed since the Intel shift instructions work rcx mod n! - bind(L); // s (mod n) < n - shrdl(lo, hi); // x := x >> s - if (sign_extension) sarl(hi); - else shrl(hi); -} - -void MacroAssembler::movoop(Register dst, jobject obj) { - mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::movoop(Address dst, jobject obj, Register rscratch) { - assert(rscratch == noreg, "redundant"); - mov_literal32(dst, (int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { - mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::mov_metadata(Address dst, Metadata* obj, Register rscratch) { - assert(rscratch == noreg, "redundant"); - mov_literal32(dst, (int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::movptr(Register dst, AddressLiteral src) { - if (src.is_lval()) { - mov_literal32(dst, (intptr_t)src.target(), src.rspec()); - } else { - movl(dst, as_Address(src)); - } -} - -void MacroAssembler::movptr(ArrayAddress dst, Register src, Register rscratch) { - assert(rscratch == noreg, "redundant"); - movl(as_Address(dst, noreg), src); -} - -void MacroAssembler::movptr(Register dst, ArrayAddress src) { - movl(dst, as_Address(src, noreg)); -} - -void MacroAssembler::movptr(Address dst, intptr_t src, Register rscratch) { - assert(rscratch == noreg, "redundant"); - movl(dst, src); -} - -void MacroAssembler::pushoop(jobject obj, Register rscratch) { - assert(rscratch == noreg, "redundant"); - push_literal32((int32_t)obj, oop_Relocation::spec_for_immediate()); -} - -void MacroAssembler::pushklass(Metadata* obj, Register rscratch) { - assert(rscratch == noreg, "redundant"); - push_literal32((int32_t)obj, metadata_Relocation::spec_for_immediate()); -} - -void MacroAssembler::pushptr(AddressLiteral src, Register rscratch) { - assert(rscratch == noreg, "redundant"); - if (src.is_lval()) { - push_literal32((int32_t)src.target(), src.rspec()); - } else { - pushl(as_Address(src)); - } -} - -static void pass_arg0(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg1(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg2(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -static void pass_arg3(MacroAssembler* masm, Register arg) { - masm->push(arg); -} - -#ifndef PRODUCT -extern "C" void findpc(intptr_t x); -#endif - -void MacroAssembler::debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg) { - // In order to get locks to work, we need to fake a in_VM state - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); - if (ShowMessageBoxOnError) { - JavaThread* thread = JavaThread::current(); - JavaThreadState saved_state = thread->thread_state(); - thread->set_thread_state(_thread_in_vm); - if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { - ttyLocker ttyl; - BytecodeCounter::print(); - } - // To see where a verify_oop failed, get $ebx+40/X for this frame. - // This is the value of eip which points to where verify_oop will return. - if (os::message_box(msg, "Execution stopped, print registers?")) { - print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip); - BREAKPOINT; - } - } - fatal("DEBUG MESSAGE: %s", msg); -} - -void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) { - ttyLocker ttyl; - DebuggingContext debugging{}; - tty->print_cr("eip = 0x%08x", eip); -#ifndef PRODUCT - if ((WizardMode || Verbose) && PrintMiscellaneous) { - tty->cr(); - findpc(eip); - tty->cr(); - } -#endif -#define PRINT_REG(rax) \ - { tty->print("%s = ", #rax); os::print_location(tty, rax); } - PRINT_REG(rax); - PRINT_REG(rbx); - PRINT_REG(rcx); - PRINT_REG(rdx); - PRINT_REG(rdi); - PRINT_REG(rsi); - PRINT_REG(rbp); - PRINT_REG(rsp); -#undef PRINT_REG - // Print some words near top of staack. - int* dump_sp = (int*) rsp; - for (int col1 = 0; col1 < 8; col1++) { - tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); - os::print_location(tty, *dump_sp++); - } - for (int row = 0; row < 16; row++) { - tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp); - for (int col = 0; col < 8; col++) { - tty->print(" 0x%08x", *dump_sp++); - } - tty->cr(); - } - // Print some instructions around pc: - Disassembler::decode((address)eip-64, (address)eip); - tty->print_cr("--------"); - Disassembler::decode((address)eip, (address)eip+32); -} - -void MacroAssembler::stop(const char* msg) { - // push address of message - ExternalAddress message((address)msg); - pushptr(message.addr(), noreg); - { Label L; call(L, relocInfo::none); bind(L); } // push eip - pusha(); // push registers - call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); - hlt(); -} - -void MacroAssembler::warn(const char* msg) { - push_CPU_state(); - - // push address of message - ExternalAddress message((address)msg); - pushptr(message.addr(), noreg); - - call(RuntimeAddress(CAST_FROM_FN_PTR(address, warning))); - addl(rsp, wordSize); // discard argument - pop_CPU_state(); -} - -void MacroAssembler::print_state() { - { Label L; call(L, relocInfo::none); bind(L); } // push eip - pusha(); // push registers - - push_CPU_state(); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32))); - pop_CPU_state(); - - popa(); - addl(rsp, wordSize); -} - -#else // _LP64 - -// 64 bit versions - Address MacroAssembler::as_Address(AddressLiteral adr) { // amd64 always does this as a pc-rel // we can be absolute or disp based on the instruction type @@ -1097,20 +708,16 @@ void MacroAssembler::object_move(OopMap* map, } } -#endif // _LP64 - -// Now versions that are common to 32/64 bit - void MacroAssembler::addptr(Register dst, int32_t imm32) { - LP64_ONLY(addq(dst, imm32)) NOT_LP64(addl(dst, imm32)); + addq(dst, imm32); } void MacroAssembler::addptr(Register dst, Register src) { - LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); + addq(dst, src); } void MacroAssembler::addptr(Address dst, Register src) { - LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); + addq(dst, src); } void MacroAssembler::addsd(XMMRegister dst, AddressLiteral src, Register rscratch) { @@ -1216,10 +823,9 @@ void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register rscratc } void MacroAssembler::andptr(Register dst, int32_t imm32) { - LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32)); + andq(dst, imm32); } -#ifdef _LP64 void MacroAssembler::andq(Register dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); @@ -1230,7 +836,6 @@ void MacroAssembler::andq(Register dst, AddressLiteral src, Register rscratch) { andq(dst, Address(rscratch, 0)); } } -#endif void MacroAssembler::atomic_incl(Address counter_addr) { lock(); @@ -1248,7 +853,6 @@ void MacroAssembler::atomic_incl(AddressLiteral counter_addr, Register rscratch) } } -#ifdef _LP64 void MacroAssembler::atomic_incq(Address counter_addr) { lock(); incrementq(counter_addr); @@ -1264,7 +868,6 @@ void MacroAssembler::atomic_incq(AddressLiteral counter_addr, Register rscratch) atomic_incq(Address(rscratch, 0)); } } -#endif // Writes to stack successive pages until offset reached to check for // stack overflow + shadow pages. This clobbers tmp. @@ -1296,8 +899,7 @@ void MacroAssembler::bang_stack_size(Register size, Register tmp) { void MacroAssembler::reserved_stack_check() { // testing if reserved zone needs to be enabled Label no_reserved_zone_enabling; - Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread); - NOT_LP64(get_thread(rsi);) + Register thread = r15_thread; cmpptr(rsp, Address(thread, JavaThread::reserved_stack_activation_offset())); jcc(Assembler::below, no_reserved_zone_enabling); @@ -1340,24 +942,19 @@ void MacroAssembler::call(AddressLiteral entry, Register rscratch) { void MacroAssembler::ic_call(address entry, jint method_index) { RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); -#ifdef _LP64 // Needs full 64-bit immediate for later patching. mov64(rax, (int64_t)Universe::non_oop_word()); -#else - movptr(rax, (intptr_t)Universe::non_oop_word()); -#endif call(AddressLiteral(entry, rh)); } int MacroAssembler::ic_check_size() { - return - LP64_ONLY(UseCompactObjectHeaders ? 17 : 14) NOT_LP64(12); + return UseCompactObjectHeaders ? 17 : 14; } int MacroAssembler::ic_check(int end_alignment) { - Register receiver = LP64_ONLY(j_rarg0) NOT_LP64(rcx); + Register receiver = j_rarg0; Register data = rax; - Register temp = LP64_ONLY(rscratch1) NOT_LP64(rbx); + Register temp = rscratch1; // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed // before the inline cache check, so we don't have to execute any nop instructions when dispatching @@ -1367,13 +964,10 @@ int MacroAssembler::ic_check(int end_alignment) { int uep_offset = offset(); -#ifdef _LP64 if (UseCompactObjectHeaders) { load_narrow_klass_compact(temp, receiver); cmpl(temp, Address(data, CompiledICData::speculated_klass_offset())); - } else -#endif - if (UseCompressedClassPointers) { + } else if (UseCompressedClassPointers) { movl(temp, Address(receiver, oopDesc::klass_offset_in_bytes())); cmpl(temp, Address(data, CompiledICData::speculated_klass_offset())); } else { @@ -1438,7 +1032,7 @@ void MacroAssembler::call_VM(Register oop_result, bind(C); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1460,8 +1054,8 @@ void MacroAssembler::call_VM(Register oop_result, bind(C); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_2, c_rarg3)); + assert_different_registers(arg_1, c_rarg2, c_rarg3); + assert_different_registers(arg_2, c_rarg3); pass_arg3(this, arg_3); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1476,8 +1070,7 @@ void MacroAssembler::call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { - Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); - call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); + call_VM_base(oop_result, r15_thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); } void MacroAssembler::call_VM(Register oop_result, @@ -1496,7 +1089,7 @@ void MacroAssembler::call_VM(Register oop_result, Register arg_2, bool check_exceptions) { - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); @@ -1509,8 +1102,8 @@ void MacroAssembler::call_VM(Register oop_result, Register arg_2, Register arg_3, bool check_exceptions) { - LP64_ONLY(assert_different_registers(arg_1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_2, c_rarg3)); + assert_different_registers(arg_1, c_rarg2, c_rarg3); + assert_different_registers(arg_2, c_rarg3); pass_arg3(this, arg_3); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1522,8 +1115,7 @@ void MacroAssembler::super_call_VM(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { - Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); - MacroAssembler::call_VM_base(oop_result, thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); + MacroAssembler::call_VM_base(oop_result, r15_thread, last_java_sp, entry_point, number_of_arguments, check_exceptions); } void MacroAssembler::super_call_VM(Register oop_result, @@ -1542,7 +1134,7 @@ void MacroAssembler::super_call_VM(Register oop_result, Register arg_2, bool check_exceptions) { - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); super_call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); @@ -1555,8 +1147,8 @@ void MacroAssembler::super_call_VM(Register oop_result, Register arg_2, Register arg_3, bool check_exceptions) { - LP64_ONLY(assert_different_registers(arg_1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_2, c_rarg3)); + assert_different_registers(arg_1, c_rarg2, c_rarg3); + assert_different_registers(arg_2, c_rarg3); pass_arg3(this, arg_3); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1571,12 +1163,7 @@ void MacroAssembler::call_VM_base(Register oop_result, bool check_exceptions) { // determine java_thread register if (!java_thread->is_valid()) { -#ifdef _LP64 java_thread = r15_thread; -#else - java_thread = rdi; - get_thread(java_thread); -#endif // LP64 } // determine last_java_sp register if (!last_java_sp->is_valid()) { @@ -1584,11 +1171,13 @@ void MacroAssembler::call_VM_base(Register oop_result, } // debugging support assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); - LP64_ONLY(assert(java_thread == r15_thread, "unexpected register")); + assert(java_thread == r15_thread, "unexpected register"); #ifdef ASSERT // TraceBytecodes does not use r12 but saves it over the call, so don't verify // r12 is the heapbase. - LP64_ONLY(if (UseCompressedOops && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");) + if (UseCompressedOops && !TraceBytecodes) { + verify_heapbase("call_VM_base: heap base corrupted?"); + } #endif // ASSERT assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); @@ -1596,8 +1185,7 @@ void MacroAssembler::call_VM_base(Register oop_result, // push java thread (becomes first argument of C function) - NOT_LP64(push(java_thread); number_of_arguments++); - LP64_ONLY(mov(c_rarg0, r15_thread)); + mov(c_rarg0, r15_thread); // set last Java frame before call assert(last_java_sp != rbp, "can't use ebp/rbp"); @@ -1611,8 +1199,7 @@ void MacroAssembler::call_VM_base(Register oop_result, // restore the thread (cannot use the pushed argument since arguments // may be overwritten by C code generated by an optimizing compiler); // however can use the register value directly if it is callee saved. - if (LP64_ONLY(true ||) java_thread == rdi || java_thread == rsi) { - // rdi & rsi (also r15) are callee saved -> nothing to do + // rdi & rsi (also r15) are callee saved -> nothing to do #ifdef ASSERT guarantee(java_thread != rax, "change this code"); push(rax); @@ -1625,9 +1212,7 @@ void MacroAssembler::call_VM_base(Register oop_result, } pop(rax); #endif - } else { - get_thread(java_thread); - } + // reset last Java frame // Only interpreter should have to clear fp reset_last_Java_frame(java_thread, true); @@ -1639,10 +1224,7 @@ void MacroAssembler::call_VM_base(Register oop_result, if (check_exceptions) { // check for pending exceptions (java_thread is set upon return) cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); -#ifndef _LP64 - jump_cc(Assembler::notEqual, - RuntimeAddress(StubRoutines::forward_exception_entry())); -#else + // This used to conditionally jump to forward_exception however it is // possible if we relocate that the branch will not reach. So we must jump // around so we can always reach @@ -1651,7 +1233,6 @@ void MacroAssembler::call_VM_base(Register oop_result, jcc(Assembler::equal, ok); jump(RuntimeAddress(StubRoutines::forward_exception_entry())); bind(ok); -#endif // LP64 } // get oop result if there is one and reset the value in the thread @@ -1672,12 +1253,8 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in // so the only extra space is the return address that call_VM created. // This hopefully explains the calculations here. -#ifdef _LP64 // We've pushed one address, correct last_Java_sp lea(rax, Address(rsp, wordSize)); -#else - lea(rax, Address(rsp, (1 + number_of_arguments) * wordSize)); -#endif // LP64 call_VM_base(oop_result, noreg, rax, entry_point, number_of_arguments, check_exceptions); @@ -1698,16 +1275,15 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { - - LP64_ONLY(assert_different_registers(arg_0, c_rarg1)); + assert_different_registers(arg_0, c_rarg1); pass_arg1(this, arg_1); pass_arg0(this, arg_0); call_VM_leaf(entry_point, 2); } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { - LP64_ONLY(assert_different_registers(arg_0, c_rarg1, c_rarg2)); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_0, c_rarg1, c_rarg2); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); pass_arg0(this, arg_0); @@ -1715,9 +1291,9 @@ void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register } void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { - LP64_ONLY(assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_2, c_rarg3)); + assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); + assert_different_registers(arg_1, c_rarg2, c_rarg3); + assert_different_registers(arg_2, c_rarg3); pass_arg3(this, arg_3); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1731,15 +1307,15 @@ void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) { } void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { - LP64_ONLY(assert_different_registers(arg_0, c_rarg1)); + assert_different_registers(arg_0, c_rarg1); pass_arg1(this, arg_1); pass_arg0(this, arg_0); MacroAssembler::call_VM_leaf_base(entry_point, 2); } void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { - LP64_ONLY(assert_different_registers(arg_0, c_rarg1, c_rarg2)); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2)); + assert_different_registers(arg_0, c_rarg1, c_rarg2); + assert_different_registers(arg_1, c_rarg2); pass_arg2(this, arg_2); pass_arg1(this, arg_1); pass_arg0(this, arg_0); @@ -1747,9 +1323,9 @@ void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Reg } void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) { - LP64_ONLY(assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_1, c_rarg2, c_rarg3)); - LP64_ONLY(assert_different_registers(arg_2, c_rarg3)); + assert_different_registers(arg_0, c_rarg1, c_rarg2, c_rarg3); + assert_different_registers(arg_1, c_rarg2, c_rarg3); + assert_different_registers(arg_2, c_rarg3); pass_arg3(this, arg_3); pass_arg2(this, arg_2); pass_arg1(this, arg_1); @@ -1862,7 +1438,6 @@ void MacroAssembler::cmp8(AddressLiteral src1, int imm, Register rscratch) { } void MacroAssembler::cmpptr(Register src1, AddressLiteral src2, Register rscratch) { -#ifdef _LP64 assert(rscratch != noreg || always_reachable(src2), "missing"); if (src2.is_lval()) { @@ -1874,26 +1449,13 @@ void MacroAssembler::cmpptr(Register src1, AddressLiteral src2, Register rscratc lea(rscratch, src2); Assembler::cmpq(src1, Address(rscratch, 0)); } -#else - assert(rscratch == noreg, "not needed"); - if (src2.is_lval()) { - cmp_literal32(src1, (int32_t)src2.target(), src2.rspec()); - } else { - cmpl(src1, as_Address(src2)); - } -#endif // _LP64 } void MacroAssembler::cmpptr(Address src1, AddressLiteral src2, Register rscratch) { assert(src2.is_lval(), "not a mem-mem compare"); -#ifdef _LP64 // moves src2's literal address movptr(rscratch, src2); Assembler::cmpq(src1, rscratch); -#else - assert(rscratch == noreg, "not needed"); - cmp_literal32(src1, (int32_t)src2.target(), src2.rspec()); -#endif // _LP64 } void MacroAssembler::cmpoop(Register src1, Register src2) { @@ -1904,12 +1466,10 @@ void MacroAssembler::cmpoop(Register src1, Address src2) { cmpptr(src1, src2); } -#ifdef _LP64 void MacroAssembler::cmpoop(Register src1, jobject src2, Register rscratch) { movoop(rscratch, src2); cmpptr(src1, rscratch); } -#endif void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch) { assert(rscratch != noreg || always_reachable(adr), "missing"); @@ -1925,7 +1485,7 @@ void MacroAssembler::locked_cmpxchgptr(Register reg, AddressLiteral adr, Registe } void MacroAssembler::cmpxchgptr(Register reg, Address adr) { - LP64_ONLY(cmpxchgq(reg, adr)) NOT_LP64(cmpxchgl(reg, adr)); + cmpxchgq(reg, adr); } void MacroAssembler::comisd(XMMRegister dst, AddressLiteral src, Register rscratch) { @@ -2088,115 +1648,6 @@ void MacroAssembler::fat_nop() { } } -#ifndef _LP64 -void MacroAssembler::fcmp(Register tmp) { - fcmp(tmp, 1, true, true); -} - -void MacroAssembler::fcmp(Register tmp, int index, bool pop_left, bool pop_right) { - assert(!pop_right || pop_left, "usage error"); - if (VM_Version::supports_cmov()) { - assert(tmp == noreg, "unneeded temp"); - if (pop_left) { - fucomip(index); - } else { - fucomi(index); - } - if (pop_right) { - fpop(); - } - } else { - assert(tmp != noreg, "need temp"); - if (pop_left) { - if (pop_right) { - fcompp(); - } else { - fcomp(index); - } - } else { - fcom(index); - } - // convert FPU condition into eflags condition via rax, - save_rax(tmp); - fwait(); fnstsw_ax(); - sahf(); - restore_rax(tmp); - } - // condition codes set as follows: - // - // CF (corresponds to C0) if x < y - // PF (corresponds to C2) if unordered - // ZF (corresponds to C3) if x = y -} - -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less) { - fcmp2int(dst, unordered_is_less, 1, true, true); -} - -void MacroAssembler::fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right) { - fcmp(VM_Version::supports_cmov() ? noreg : dst, index, pop_left, pop_right); - Label L; - if (unordered_is_less) { - movl(dst, -1); - jcc(Assembler::parity, L); - jcc(Assembler::below , L); - movl(dst, 0); - jcc(Assembler::equal , L); - increment(dst); - } else { // unordered is greater - movl(dst, 1); - jcc(Assembler::parity, L); - jcc(Assembler::above , L); - movl(dst, 0); - jcc(Assembler::equal , L); - decrementl(dst); - } - bind(L); -} - -void MacroAssembler::fld_d(AddressLiteral src) { - fld_d(as_Address(src)); -} - -void MacroAssembler::fld_s(AddressLiteral src) { - fld_s(as_Address(src)); -} - -void MacroAssembler::fldcw(AddressLiteral src) { - fldcw(as_Address(src)); -} - -void MacroAssembler::fpop() { - ffree(); - fincstp(); -} - -void MacroAssembler::fremr(Register tmp) { - save_rax(tmp); - { Label L; - bind(L); - fprem(); - fwait(); fnstsw_ax(); - sahf(); - jcc(Assembler::parity, L); - } - restore_rax(tmp); - // Result is in ST0. - // Note: fxch & fpop to get rid of ST1 - // (otherwise FPU stack could overflow eventually) - fxch(1); - fpop(); -} - -void MacroAssembler::empty_FPU_stack() { - if (VM_Version::supports_mmx()) { - emms(); - } else { - for (int i = 8; i-- > 0; ) ffree(i); - } -} -#endif // !LP64 - void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src, Register rscratch) { assert(rscratch != noreg || always_reachable(src), "missing"); if (reachable(src)) { @@ -2208,51 +1659,19 @@ void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src, Register rscratc } void MacroAssembler::load_float(Address src) { -#ifdef _LP64 movflt(xmm0, src); -#else - if (UseSSE >= 1) { - movflt(xmm0, src); - } else { - fld_s(src); - } -#endif // LP64 } void MacroAssembler::store_float(Address dst) { -#ifdef _LP64 movflt(dst, xmm0); -#else - if (UseSSE >= 1) { - movflt(dst, xmm0); - } else { - fstp_s(dst); - } -#endif // LP64 } void MacroAssembler::load_double(Address src) { -#ifdef _LP64 movdbl(xmm0, src); -#else - if (UseSSE >= 2) { - movdbl(xmm0, src); - } else { - fld_d(src); - } -#endif // LP64 } void MacroAssembler::store_double(Address dst) { -#ifdef _LP64 movdbl(dst, xmm0); -#else - if (UseSSE >= 2) { - movdbl(dst, xmm0); - } else { - fstp_d(dst); - } -#endif // LP64 } // dst = c = a * b + c @@ -2388,15 +1807,8 @@ void MacroAssembler::ldmxcsr(AddressLiteral src, Register rscratch) { } int MacroAssembler::load_signed_byte(Register dst, Address src) { - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - off = offset(); - movsbl(dst, src); // movsxb - } else { - off = load_unsigned_byte(dst, src); - shll(dst, 24); - sarl(dst, 24); - } + int off = offset(); + movsbl(dst, src); // movsxb return off; } @@ -2405,33 +1817,19 @@ int MacroAssembler::load_signed_byte(Register dst, Address src) { // manual, which means 16 bits, that usage is found nowhere in HotSpot code. // The term "word" in HotSpot means a 32- or 64-bit machine word. int MacroAssembler::load_signed_short(Register dst, Address src) { - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - // This is dubious to me since it seems safe to do a signed 16 => 64 bit - // version but this is what 64bit has always done. This seems to imply - // that users are only using 32bits worth. - off = offset(); - movswl(dst, src); // movsxw - } else { - off = load_unsigned_short(dst, src); - shll(dst, 16); - sarl(dst, 16); - } + int off = offset(); + // This is dubious to me since it seems safe to do a signed 16 => 64 bit + // version but this is what 64bit has always done. This seems to imply + // that users are only using 32bits worth. + movswl(dst, src); // movsxw return off; } int MacroAssembler::load_unsigned_byte(Register dst, Address src) { // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, // and "3.9 Partial Register Penalties", p. 22). - int off; - if (LP64_ONLY(true || ) VM_Version::is_P6() || src.uses(dst)) { - off = offset(); - movzbl(dst, src); // movzxb - } else { - xorl(dst, dst); - off = offset(); - movb(dst, src); - } + int off = offset(); + movzbl(dst, src); // movzxb return off; } @@ -2439,29 +1837,14 @@ int MacroAssembler::load_unsigned_byte(Register dst, Address src) { int MacroAssembler::load_unsigned_short(Register dst, Address src) { // According to Intel Doc. AP-526, "Zero-Extension of Short", p.16, // and "3.9 Partial Register Penalties", p. 22). - int off; - if (LP64_ONLY(true ||) VM_Version::is_P6() || src.uses(dst)) { - off = offset(); - movzwl(dst, src); // movzxw - } else { - xorl(dst, dst); - off = offset(); - movw(dst, src); - } + int off = offset(); + movzwl(dst, src); // movzxw return off; } void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { switch (size_in_bytes) { -#ifndef _LP64 - case 8: - assert(dst2 != noreg, "second dest register required"); - movl(dst, src); - movl(dst2, src.plus_disp(BytesPerInt)); - break; -#else case 8: movq(dst, src); break; -#endif case 4: movl(dst, src); break; case 2: is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break; case 1: is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break; @@ -2471,15 +1854,7 @@ void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_ void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { switch (size_in_bytes) { -#ifndef _LP64 - case 8: - assert(src2 != noreg, "second source register required"); - movl(dst, src); - movl(dst.plus_disp(BytesPerInt), src2); - break; -#else case 8: movq(dst, src); break; -#endif case 4: movl(dst, src); break; case 2: movw(dst, src); break; case 1: movb(dst, src); break; @@ -2598,16 +1973,15 @@ void MacroAssembler::movflt(XMMRegister dst, AddressLiteral src, Register rscrat } void MacroAssembler::movptr(Register dst, Register src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); + movq(dst, src); } void MacroAssembler::movptr(Register dst, Address src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); + movq(dst, src); } // src should NEVER be a real pointer. Use AddressLiteral for true pointers void MacroAssembler::movptr(Register dst, intptr_t src) { -#ifdef _LP64 if (is_uimm32(src)) { movl(dst, checked_cast(src)); } else if (is_simm32(src)) { @@ -2615,17 +1989,14 @@ void MacroAssembler::movptr(Register dst, intptr_t src) { } else { mov64(dst, src); } -#else - movl(dst, src); -#endif } void MacroAssembler::movptr(Address dst, Register src) { - LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src)); + movq(dst, src); } void MacroAssembler::movptr(Address dst, int32_t src) { - LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); + movslq(dst, src); } void MacroAssembler::movdqu(Address dst, XMMRegister src) { @@ -2926,9 +2297,7 @@ void MacroAssembler::unimplemented(const char* what) { stop(buf); } -#ifdef _LP64 #define XSTATE_BV 0x200 -#endif void MacroAssembler::pop_CPU_state() { pop_FPU_state(); @@ -2936,17 +2305,13 @@ void MacroAssembler::pop_CPU_state() { } void MacroAssembler::pop_FPU_state() { -#ifndef _LP64 - frstor(Address(rsp, 0)); -#else fxrstor(Address(rsp, 0)); -#endif addptr(rsp, FPUStateSizeInWords * wordSize); } void MacroAssembler::pop_IU_state() { popa(); - LP64_ONLY(addq(rsp, 8)); + addq(rsp, 8); popf(); } @@ -2959,110 +2324,59 @@ void MacroAssembler::push_CPU_state() { void MacroAssembler::push_FPU_state() { subptr(rsp, FPUStateSizeInWords * wordSize); -#ifndef _LP64 - fnsave(Address(rsp, 0)); - fwait(); -#else fxsave(Address(rsp, 0)); -#endif // LP64 } void MacroAssembler::push_IU_state() { // Push flags first because pusha kills them pushf(); // Make sure rsp stays 16-byte aligned - LP64_ONLY(subq(rsp, 8)); + subq(rsp, 8); pusha(); } void MacroAssembler::push_cont_fastpath() { if (!Continuations::enabled()) return; -#ifndef _LP64 - Register rthread = rax; - Register rrealsp = rbx; - push(rthread); - push(rrealsp); - - get_thread(rthread); - - // The code below wants the original RSP. - // Move it back after the pushes above. - movptr(rrealsp, rsp); - addptr(rrealsp, 2*wordSize); -#else Register rthread = r15_thread; Register rrealsp = rsp; -#endif Label done; cmpptr(rrealsp, Address(rthread, JavaThread::cont_fastpath_offset())); jccb(Assembler::belowEqual, done); movptr(Address(rthread, JavaThread::cont_fastpath_offset()), rrealsp); bind(done); - -#ifndef _LP64 - pop(rrealsp); - pop(rthread); -#endif } void MacroAssembler::pop_cont_fastpath() { if (!Continuations::enabled()) return; -#ifndef _LP64 - Register rthread = rax; - Register rrealsp = rbx; - push(rthread); - push(rrealsp); - - get_thread(rthread); - - // The code below wants the original RSP. - // Move it back after the pushes above. - movptr(rrealsp, rsp); - addptr(rrealsp, 2*wordSize); -#else Register rthread = r15_thread; Register rrealsp = rsp; -#endif Label done; cmpptr(rrealsp, Address(rthread, JavaThread::cont_fastpath_offset())); jccb(Assembler::below, done); movptr(Address(rthread, JavaThread::cont_fastpath_offset()), 0); bind(done); - -#ifndef _LP64 - pop(rrealsp); - pop(rthread); -#endif } void MacroAssembler::inc_held_monitor_count() { -#ifdef _LP64 incrementq(Address(r15_thread, JavaThread::held_monitor_count_offset())); -#endif } void MacroAssembler::dec_held_monitor_count() { -#ifdef _LP64 decrementq(Address(r15_thread, JavaThread::held_monitor_count_offset())); -#endif } #ifdef ASSERT void MacroAssembler::stop_if_in_cont(Register cont, const char* name) { -#ifdef _LP64 Label no_cont; movptr(cont, Address(r15_thread, JavaThread::cont_entry_offset())); testl(cont, cont); jcc(Assembler::zero, no_cont); stop(name); bind(no_cont); -#else - Unimplemented(); -#endif } #endif @@ -3143,7 +2457,6 @@ void MacroAssembler::set_last_Java_frame(Register java_thread, movptr(Address(java_thread, JavaThread::last_Java_sp_offset()), last_java_sp); } -#ifdef _LP64 void MacroAssembler::set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &L, @@ -3152,32 +2465,21 @@ void MacroAssembler::set_last_Java_frame(Register last_java_sp, movptr(Address(r15_thread, JavaThread::last_Java_pc_offset()), scratch); set_last_Java_frame(r15_thread, last_java_sp, last_java_fp, nullptr, scratch); } -#endif void MacroAssembler::shlptr(Register dst, int imm8) { - LP64_ONLY(shlq(dst, imm8)) NOT_LP64(shll(dst, imm8)); + shlq(dst, imm8); } void MacroAssembler::shrptr(Register dst, int imm8) { - LP64_ONLY(shrq(dst, imm8)) NOT_LP64(shrl(dst, imm8)); + shrq(dst, imm8);; } void MacroAssembler::sign_extend_byte(Register reg) { - if (LP64_ONLY(true ||) (VM_Version::is_P6() && reg->has_byte_register())) { - movsbl(reg, reg); // movsxb - } else { - shll(reg, 24); - sarl(reg, 24); - } + movsbl(reg, reg); // movsxb } void MacroAssembler::sign_extend_short(Register reg) { - if (LP64_ONLY(true ||) VM_Version::is_P6()) { - movswl(reg, reg); // movsxw - } else { - shll(reg, 16); - sarl(reg, 16); - } + movswl(reg, reg); // movsxw } void MacroAssembler::testl(Address dst, int32_t imm32) { @@ -3201,8 +2503,6 @@ void MacroAssembler::testl(Register dst, AddressLiteral src) { testl(dst, as_Address(src)); } -#ifdef _LP64 - void MacroAssembler::testq(Address dst, int32_t imm32) { if (imm32 >= 0) { testl(dst, imm32); @@ -3219,8 +2519,6 @@ void MacroAssembler::testq(Register dst, int32_t imm32) { } } -#endif - void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::pcmpeqb(dst, src); @@ -4053,16 +3351,16 @@ void MacroAssembler::resolve_global_jobject(Register value, } void MacroAssembler::subptr(Register dst, int32_t imm32) { - LP64_ONLY(subq(dst, imm32)) NOT_LP64(subl(dst, imm32)); + subq(dst, imm32); } // Force generation of a 4 byte immediate value even if it fits into 8bit void MacroAssembler::subptr_imm32(Register dst, int32_t imm32) { - LP64_ONLY(subq_imm32(dst, imm32)) NOT_LP64(subl_imm32(dst, imm32)); + subq_imm32(dst, imm32); } void MacroAssembler::subptr(Register dst, Register src) { - LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); + subq(dst, src); } // C++ bool manipulation @@ -4080,7 +3378,7 @@ void MacroAssembler::testbool(Register dst) { } void MacroAssembler::testptr(Register dst, Register src) { - LP64_ONLY(testq(dst, src)) NOT_LP64(testl(dst, src)); + testq(dst, src); } // Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. @@ -4096,20 +3394,14 @@ void MacroAssembler::tlab_allocate(Register thread, Register obj, RegSet MacroAssembler::call_clobbered_gp_registers() { RegSet regs; -#ifdef _LP64 regs += RegSet::of(rax, rcx, rdx); #ifndef _WINDOWS regs += RegSet::of(rsi, rdi); #endif regs += RegSet::range(r8, r11); -#else - regs += RegSet::of(rax, rcx, rdx); -#endif -#ifdef _LP64 if (UseAPX) { regs += RegSet::range(r16, as_Register(Register::number_of_registers - 1)); } -#endif return regs; } @@ -4126,30 +3418,15 @@ XMMRegSet MacroAssembler::call_clobbered_xmm_registers() { #endif } -static int FPUSaveAreaSize = align_up(108, StackAlignmentInBytes); // 108 bytes needed for FPU state by fsave/frstor - -#ifndef _LP64 -static bool use_x87_registers() { return UseSSE < 2; } -#endif -static bool use_xmm_registers() { return UseSSE >= 1; } - // C1 only ever uses the first double/float of the XMM register. -static int xmm_save_size() { return UseSSE >= 2 ? sizeof(double) : sizeof(float); } +static int xmm_save_size() { return sizeof(double); } static void save_xmm_register(MacroAssembler* masm, int offset, XMMRegister reg) { - if (UseSSE == 1) { - masm->movflt(Address(rsp, offset), reg); - } else { - masm->movdbl(Address(rsp, offset), reg); - } + masm->movdbl(Address(rsp, offset), reg); } static void restore_xmm_register(MacroAssembler* masm, int offset, XMMRegister reg) { - if (UseSSE == 1) { - masm->movflt(reg, Address(rsp, offset)); - } else { - masm->movdbl(reg, Address(rsp, offset)); - } + masm->movdbl(reg, Address(rsp, offset)); } static int register_section_sizes(RegSet gp_registers, XMMRegSet xmm_registers, @@ -4158,12 +3435,8 @@ static int register_section_sizes(RegSet gp_registers, XMMRegSet xmm_registers, gp_area_size = align_up(gp_registers.size() * Register::max_slots_per_register * VMRegImpl::stack_slot_size, StackAlignmentInBytes); -#ifdef _LP64 fp_area_size = 0; -#else - fp_area_size = (save_fpu && use_x87_registers()) ? FPUSaveAreaSize : 0; -#endif - xmm_area_size = (save_fpu && use_xmm_registers()) ? xmm_registers.size() * xmm_save_size() : 0; + xmm_area_size = save_fpu ? xmm_registers.size() * xmm_save_size() : 0; return gp_area_size + fp_area_size + xmm_area_size; } @@ -4182,13 +3455,7 @@ void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude, bool s push_set(gp_registers_to_push, 0); -#ifndef _LP64 - if (save_fpu && use_x87_registers()) { - fnsave(Address(rsp, gp_area_size)); - fwait(); - } -#endif - if (save_fpu && use_xmm_registers()) { + if (save_fpu) { push_set(call_clobbered_xmm_registers(), gp_area_size + fp_area_size); } @@ -4206,14 +3473,9 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude, bool re int total_save_size = register_section_sizes(gp_registers_to_pop, call_clobbered_xmm_registers(), restore_fpu, gp_area_size, fp_area_size, xmm_area_size); - if (restore_fpu && use_xmm_registers()) { + if (restore_fpu) { pop_set(call_clobbered_xmm_registers(), gp_area_size + fp_area_size); } -#ifndef _LP64 - if (restore_fpu && use_x87_registers()) { - frstor(Address(rsp, gp_area_size)); - } -#endif pop_set(gp_registers_to_pop, 0); @@ -4313,27 +3575,12 @@ void MacroAssembler::zero_memory(Register address, Register length_in_bytes, int shrptr(index, 2); // use 2 instructions to avoid partial flag stall shrptr(index, 1); } -#ifndef _LP64 - // index could have not been a multiple of 8 (i.e., bit 2 was set) - { - Label even; - // note: if index was a multiple of 8, then it cannot - // be 0 now otherwise it must have been 0 before - // => if it is even, we don't need to check for 0 again - jcc(Assembler::carryClear, even); - // clear topmost word (no jump would be needed if conditional assignment worked here) - movptr(Address(address, index, Address::times_8, offset_in_bytes - 0*BytesPerWord), temp); - // index could be 0 now, must check again - jcc(Assembler::zero, done); - bind(even); - } -#endif // !_LP64 + // initialize remaining object fields: index is a multiple of 2 now { Label loop; bind(loop); movptr(Address(address, index, Address::times_8, offset_in_bytes - 1*BytesPerWord), temp); - NOT_LP64(movptr(Address(address, index, Address::times_8, offset_in_bytes - 2*BytesPerWord), temp);) decrement(index); jcc(Assembler::notZero, loop); } @@ -4710,9 +3957,8 @@ void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass, #ifndef PRODUCT uint* pst_counter = &SharedRuntime::_partial_subtype_ctr; ExternalAddress pst_counter_addr((address) pst_counter); - NOT_LP64( incrementl(pst_counter_addr) ); - LP64_ONLY( lea(rcx, pst_counter_addr) ); - LP64_ONLY( incrementl(Address(rcx, 0)) ); + lea(rcx, pst_counter_addr); + incrementl(Address(rcx, 0)); #endif //PRODUCT // We will consult the secondary-super array. @@ -4758,22 +4004,6 @@ void MacroAssembler::check_klass_subtype_slow_path_linear(Register sub_klass, bind(L_fallthrough); } -#ifndef _LP64 - -// 32-bit x86 only: always use the linear search. -void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, - Register super_klass, - Register temp_reg, - Register temp2_reg, - Label* L_success, - Label* L_failure, - bool set_cond_codes) { - check_klass_subtype_slow_path_linear - (sub_klass, super_klass, temp_reg, temp2_reg, L_success, L_failure, set_cond_codes); -} - -#else // _LP64 - void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, Register super_klass, Register temp_reg, @@ -5357,8 +4587,6 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass, #undef LOOKUP_SECONDARY_SUPERS_TABLE_REGISTERS -#endif // LP64 - void MacroAssembler::clinit_barrier(Register klass, Register thread, Label* L_fast_path, Label* L_slow_path) { assert(L_fast_path != nullptr || L_slow_path != nullptr, "at least one is required"); @@ -5413,9 +4641,7 @@ void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, if (!VerifyOops) return; BLOCK_COMMENT("verify_oop {"); -#ifdef _LP64 push(rscratch1); -#endif push(rax); // save rax push(reg); // pass register argument @@ -5473,9 +4699,7 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { if (!VerifyOops) return; -#ifdef _LP64 push(rscratch1); -#endif push(rax); // save rax, // addr may contain rsp so we will have to adjust it based on the push // we just did (and on 64 bit we do two pushes) @@ -5483,7 +4707,7 @@ void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* f // stores rax into addr which is backwards of what was intended. if (addr.uses(rsp)) { lea(rax, addr); - pushptr(Address(rax, LP64_ONLY(2 *) BytesPerWord)); + pushptr(Address(rax, 2 * BytesPerWord)); } else { pushptr(addr); } @@ -5510,11 +4734,9 @@ void MacroAssembler::verify_tlab() { if (UseTLAB && VerifyOops) { Label next, ok; Register t1 = rsi; - Register thread_reg = NOT_LP64(rbx) LP64_ONLY(r15_thread); + Register thread_reg = r15_thread; push(t1); - NOT_LP64(push(thread_reg)); - NOT_LP64(get_thread(thread_reg)); movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset()))); cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset()))); @@ -5530,7 +4752,6 @@ void MacroAssembler::verify_tlab() { should_not_reach_here(); bind(ok); - NOT_LP64(pop(thread_reg)); pop(t1); } #endif @@ -5811,84 +5032,6 @@ void MacroAssembler::print_CPU_state() { } -#ifndef _LP64 -static bool _verify_FPU(int stack_depth, char* s, CPU_State* state) { - static int counter = 0; - FPU_State* fs = &state->_fpu_state; - counter++; - // For leaf calls, only verify that the top few elements remain empty. - // We only need 1 empty at the top for C2 code. - if( stack_depth < 0 ) { - if( fs->tag_for_st(7) != 3 ) { - printf("FPR7 not empty\n"); - state->print(); - assert(false, "error"); - return false; - } - return true; // All other stack states do not matter - } - - assert((fs->_control_word._value & 0xffff) == StubRoutines::x86::fpu_cntrl_wrd_std(), - "bad FPU control word"); - - // compute stack depth - int i = 0; - while (i < FPU_State::number_of_registers && fs->tag_for_st(i) < 3) i++; - int d = i; - while (i < FPU_State::number_of_registers && fs->tag_for_st(i) == 3) i++; - // verify findings - if (i != FPU_State::number_of_registers) { - // stack not contiguous - printf("%s: stack not contiguous at ST%d\n", s, i); - state->print(); - assert(false, "error"); - return false; - } - // check if computed stack depth corresponds to expected stack depth - if (stack_depth < 0) { - // expected stack depth is -stack_depth or less - if (d > -stack_depth) { - // too many elements on the stack - printf("%s: <= %d stack elements expected but found %d\n", s, -stack_depth, d); - state->print(); - assert(false, "error"); - return false; - } - } else { - // expected stack depth is stack_depth - if (d != stack_depth) { - // wrong stack depth - printf("%s: %d stack elements expected but found %d\n", s, stack_depth, d); - state->print(); - assert(false, "error"); - return false; - } - } - // everything is cool - return true; -} - -void MacroAssembler::verify_FPU(int stack_depth, const char* s) { - if (!VerifyFPU) return; - push_CPU_state(); - push(rsp); // pass CPU state - ExternalAddress msg((address) s); - // pass message string s - pushptr(msg.addr(), noreg); - push(stack_depth); // pass stack depth - call(RuntimeAddress(CAST_FROM_FN_PTR(address, _verify_FPU))); - addptr(rsp, 3 * wordSize); // discard arguments - // check for error - { Label L; - testl(rax, rax); - jcc(Assembler::notZero, L); - int3(); // break if error condition - bind(L); - } - pop_CPU_state(); -} -#endif // _LP64 - void MacroAssembler::restore_cpu_control_state_after_jni(Register rscratch) { // Either restore the MXCSR register after returning from the JNI Call // or verify that it wasn't changed (with -Xcheck:jni flag). @@ -5901,14 +5044,6 @@ void MacroAssembler::restore_cpu_control_state_after_jni(Register rscratch) { } // Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty. vzeroupper(); - -#ifndef _LP64 - // Either restore the x87 floating pointer control word after returning - // from the JNI call or verify that it wasn't changed. - if (CheckJNICalls) { - call(RuntimeAddress(StubRoutines::x86::verify_fpu_cntrl_wrd_entry())); - } -#endif // _LP64 } // ((OopHandle)result).resolve(); @@ -5958,27 +5093,22 @@ void MacroAssembler::load_method_holder(Register holder, Register method) { movptr(holder, Address(holder, ConstantPool::pool_holder_offset())); // InstanceKlass* } -#ifdef _LP64 void MacroAssembler::load_narrow_klass_compact(Register dst, Register src) { assert(UseCompactObjectHeaders, "expect compact object headers"); movq(dst, Address(src, oopDesc::mark_offset_in_bytes())); shrq(dst, markWord::klass_shift); } -#endif void MacroAssembler::load_klass(Register dst, Register src, Register tmp) { assert_different_registers(src, tmp); assert_different_registers(dst, tmp); -#ifdef _LP64 if (UseCompactObjectHeaders) { load_narrow_klass_compact(dst, src); decode_klass_not_null(dst, tmp); } else if (UseCompressedClassPointers) { movl(dst, Address(src, oopDesc::klass_offset_in_bytes())); decode_klass_not_null(dst, tmp); - } else -#endif - { + } else { movptr(dst, Address(src, oopDesc::klass_offset_in_bytes())); } } @@ -5987,17 +5117,15 @@ void MacroAssembler::store_klass(Register dst, Register src, Register tmp) { assert(!UseCompactObjectHeaders, "not with compact headers"); assert_different_registers(src, tmp); assert_different_registers(dst, tmp); -#ifdef _LP64 if (UseCompressedClassPointers) { encode_klass_not_null(src, tmp); movl(Address(dst, oopDesc::klass_offset_in_bytes()), src); - } else -#endif + } else { movptr(Address(dst, oopDesc::klass_offset_in_bytes()), src); + } } void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) { -#ifdef _LP64 if (UseCompactObjectHeaders) { assert(tmp != noreg, "need tmp"); assert_different_registers(klass, obj, tmp); @@ -6005,15 +5133,12 @@ void MacroAssembler::cmp_klass(Register klass, Register obj, Register tmp) { cmpl(klass, tmp); } else if (UseCompressedClassPointers) { cmpl(klass, Address(obj, oopDesc::klass_offset_in_bytes())); - } else -#endif - { + } else { cmpptr(klass, Address(obj, oopDesc::klass_offset_in_bytes())); } } void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Register tmp1, Register tmp2) { -#ifdef _LP64 if (UseCompactObjectHeaders) { assert(tmp2 != noreg, "need tmp2"); assert_different_registers(obj1, obj2, tmp1, tmp2); @@ -6023,9 +5148,7 @@ void MacroAssembler::cmp_klasses_from_objects(Register obj1, Register obj2, Regi } else if (UseCompressedClassPointers) { movl(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes())); cmpl(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes())); - } else -#endif - { + } else { movptr(tmp1, Address(obj1, oopDesc::klass_offset_in_bytes())); cmpptr(tmp1, Address(obj2, oopDesc::klass_offset_in_bytes())); } @@ -6076,7 +5199,6 @@ void MacroAssembler::store_heap_oop_null(Address dst) { access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg); } -#ifdef _LP64 void MacroAssembler::store_klass_gap(Register dst, Register src) { assert(!UseCompactObjectHeaders, "Don't use with compact headers"); if (UseCompressedClassPointers) { @@ -6398,8 +5520,6 @@ void MacroAssembler::reinit_heapbase() { } } -#endif // _LP64 - #if COMPILER2_OR_JVMCI // clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers @@ -6578,8 +5698,6 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg cmpptr(cnt, InitArrayShortSize/BytesPerLong); jccb(Assembler::greater, LONG); - NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM - decrement(cnt); jccb(Assembler::negative, DONE); // Zero length @@ -6600,7 +5718,6 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg } else if (UseXMMForObjInit) { xmm_clear_mem(base, cnt, tmp, xtmp, mask); } else { - NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM rep_stos(); } @@ -6618,7 +5735,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, Label L_exit; Label L_fill_2_bytes, L_fill_4_bytes; -#if defined(COMPILER2) && defined(_LP64) +#if defined(COMPILER2) if(MaxVectorSize >=32 && VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()) { @@ -6679,39 +5796,7 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, subptr(count, 1<<(shift-1)); BIND(L_skip_align2); } - if (UseSSE < 2) { - Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; - // Fill 32-byte chunks - subptr(count, 8 << shift); - jcc(Assembler::less, L_check_fill_8_bytes); - align(16); - - BIND(L_fill_32_bytes_loop); - - for (int i = 0; i < 32; i += 4) { - movl(Address(to, i), value); - } - - addptr(to, 32); - subptr(count, 8 << shift); - jcc(Assembler::greaterEqual, L_fill_32_bytes_loop); - BIND(L_check_fill_8_bytes); - addptr(count, 8 << shift); - jccb(Assembler::zero, L_exit); - jmpb(L_fill_8_bytes); - - // - // length is too short, just fill qwords - // - BIND(L_fill_8_bytes_loop); - movl(Address(to, 0), value); - movl(Address(to, 4), value); - addptr(to, 8); - BIND(L_fill_8_bytes); - subptr(count, 1 << (shift + 1)); - jcc(Assembler::greaterEqual, L_fill_8_bytes_loop); - // fall through to fill 4 bytes - } else { + { Label L_fill_32_bytes; if (!UseUnalignedLoadStores) { // align to 8 bytes, we know we are 4 byte aligned to start @@ -6723,7 +5808,6 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned, } BIND(L_fill_32_bytes); { - assert( UseSSE >= 2, "supported cpu only" ); Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes; movdl(xtmp, value); if (UseAVX >= 2 && UseUnalignedLoadStores) { @@ -7031,7 +6115,6 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, Register len, bind(L_done); } -#ifdef _LP64 /** * Helper for multiply_to_len(). */ @@ -8206,7 +7289,6 @@ void MacroAssembler::mul_add(Register out, Register in, Register offs, pop(tmp2); pop(tmp1); } -#endif /** * Emits code to update CRC-32 with a byte value according to constants in table @@ -8429,7 +7511,6 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi notl(crc); // ~c } -#ifdef _LP64 // Helper function for AVX 512 CRC32 // Fold 512-bit data chunks void MacroAssembler::fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, @@ -8947,155 +8028,7 @@ void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp bind(L_exit); } -#else -void MacroAssembler::crc32c_ipl_alg4(Register in_out, uint32_t n, - Register tmp1, Register tmp2, Register tmp3, - XMMRegister xtmp1, XMMRegister xtmp2) { - lea(tmp3, ExternalAddress(StubRoutines::crc32c_table_addr())); - if (n > 0) { - addl(tmp3, n * 256 * 8); - } - // Q1 = TABLEExt[n][B & 0xFF]; - movl(tmp1, in_out); - andl(tmp1, 0x000000FF); - shll(tmp1, 3); - addl(tmp1, tmp3); - movq(xtmp1, Address(tmp1, 0)); - - // Q2 = TABLEExt[n][B >> 8 & 0xFF]; - movl(tmp2, in_out); - shrl(tmp2, 8); - andl(tmp2, 0x000000FF); - shll(tmp2, 3); - addl(tmp2, tmp3); - movq(xtmp2, Address(tmp2, 0)); - - psllq(xtmp2, 8); - pxor(xtmp1, xtmp2); - - // Q3 = TABLEExt[n][B >> 16 & 0xFF]; - movl(tmp2, in_out); - shrl(tmp2, 16); - andl(tmp2, 0x000000FF); - shll(tmp2, 3); - addl(tmp2, tmp3); - movq(xtmp2, Address(tmp2, 0)); - - psllq(xtmp2, 16); - pxor(xtmp1, xtmp2); - - // Q4 = TABLEExt[n][B >> 24 & 0xFF]; - shrl(in_out, 24); - andl(in_out, 0x000000FF); - shll(in_out, 3); - addl(in_out, tmp3); - movq(xtmp2, Address(in_out, 0)); - - psllq(xtmp2, 24); - pxor(xtmp1, xtmp2); // Result in CXMM - // return Q1 ^ Q2 << 8 ^ Q3 << 16 ^ Q4 << 24; -} - -void MacroAssembler::crc32c_pclmulqdq(XMMRegister w_xtmp1, - Register in_out, - uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, - XMMRegister w_xtmp2, - Register tmp1, - Register n_tmp2, Register n_tmp3) { - if (is_pclmulqdq_supported) { - movdl(w_xtmp1, in_out); - - movl(tmp1, const_or_pre_comp_const_index); - movdl(w_xtmp2, tmp1); - pclmulqdq(w_xtmp1, w_xtmp2, 0); - // Keep result in XMM since GPR is 32 bit in length - } else { - crc32c_ipl_alg4(in_out, const_or_pre_comp_const_index, tmp1, n_tmp2, n_tmp3, w_xtmp1, w_xtmp2); - } -} - -void MacroAssembler::crc32c_rec_alt2(uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, Register in_out, Register in1, Register in2, - XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, - Register tmp1, Register tmp2, - Register n_tmp3) { - crc32c_pclmulqdq(w_xtmp1, in_out, const_or_pre_comp_const_index_u1, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); - crc32c_pclmulqdq(w_xtmp2, in1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, w_xtmp3, tmp1, tmp2, n_tmp3); - - psllq(w_xtmp1, 1); - movdl(tmp1, w_xtmp1); - psrlq(w_xtmp1, 32); - movdl(in_out, w_xtmp1); - - xorl(tmp2, tmp2); - crc32(tmp2, tmp1, 4); - xorl(in_out, tmp2); - - psllq(w_xtmp2, 1); - movdl(tmp1, w_xtmp2); - psrlq(w_xtmp2, 32); - movdl(in1, w_xtmp2); - - xorl(tmp2, tmp2); - crc32(tmp2, tmp1, 4); - xorl(in1, tmp2); - xorl(in_out, in1); - xorl(in_out, in2); -} - -void MacroAssembler::crc32c_proc_chunk(uint32_t size, uint32_t const_or_pre_comp_const_index_u1, uint32_t const_or_pre_comp_const_index_u2, bool is_pclmulqdq_supported, - Register in_out1, Register in_out2, Register in_out3, - Register tmp1, Register tmp2, Register tmp3, - XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, - Register tmp4, Register tmp5, - Register n_tmp6) { - Label L_processPartitions; - Label L_processPartition; - Label L_exit; - - bind(L_processPartitions); - cmpl(in_out1, 3 * size); - jcc(Assembler::less, L_exit); - xorl(tmp1, tmp1); - xorl(tmp2, tmp2); - movl(tmp3, in_out2); - addl(tmp3, size); - - bind(L_processPartition); - crc32(in_out3, Address(in_out2, 0), 4); - crc32(tmp1, Address(in_out2, size), 4); - crc32(tmp2, Address(in_out2, size*2), 4); - crc32(in_out3, Address(in_out2, 0+4), 4); - crc32(tmp1, Address(in_out2, size+4), 4); - crc32(tmp2, Address(in_out2, size*2+4), 4); - addl(in_out2, 8); - cmpl(in_out2, tmp3); - jcc(Assembler::less, L_processPartition); - - push(tmp3); - push(in_out1); - push(in_out2); - tmp4 = tmp3; - tmp5 = in_out1; - n_tmp6 = in_out2; - - crc32c_rec_alt2(const_or_pre_comp_const_index_u1, const_or_pre_comp_const_index_u2, is_pclmulqdq_supported, in_out3, tmp1, tmp2, - w_xtmp1, w_xtmp2, w_xtmp3, - tmp4, tmp5, - n_tmp6); - - pop(in_out2); - pop(in_out1); - pop(tmp3); - - addl(in_out2, 2 * size); - subl(in_out1, 3 * size); - jmp(L_processPartitions); - - bind(L_exit); -} -#endif //LP64 -#ifdef _LP64 // Algorithm 2: Pipelined usage of the CRC32 instruction. // Input: A buffer I of L bytes. // Output: the CRC32C value of the buffer. @@ -9187,84 +8120,6 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe BIND(L_exit); } -#else -void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Register in2, - Register tmp1, Register tmp2, Register tmp3, - Register tmp4, Register tmp5, Register tmp6, - XMMRegister w_xtmp1, XMMRegister w_xtmp2, XMMRegister w_xtmp3, - bool is_pclmulqdq_supported) { - uint32_t const_or_pre_comp_const_index[CRC32C_NUM_PRECOMPUTED_CONSTANTS]; - Label L_wordByWord; - Label L_byteByByteProlog; - Label L_byteByByte; - Label L_exit; - - if (is_pclmulqdq_supported) { - const_or_pre_comp_const_index[1] = *(uint32_t *)StubRoutines::_crc32c_table_addr; - const_or_pre_comp_const_index[0] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 1); - - const_or_pre_comp_const_index[3] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 2); - const_or_pre_comp_const_index[2] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 3); - - const_or_pre_comp_const_index[5] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 4); - const_or_pre_comp_const_index[4] = *((uint32_t *)StubRoutines::_crc32c_table_addr + 5); - } else { - const_or_pre_comp_const_index[0] = 1; - const_or_pre_comp_const_index[1] = 0; - - const_or_pre_comp_const_index[2] = 3; - const_or_pre_comp_const_index[3] = 2; - - const_or_pre_comp_const_index[4] = 5; - const_or_pre_comp_const_index[5] = 4; - } - crc32c_proc_chunk(CRC32C_HIGH, const_or_pre_comp_const_index[0], const_or_pre_comp_const_index[1], is_pclmulqdq_supported, - in2, in1, in_out, - tmp1, tmp2, tmp3, - w_xtmp1, w_xtmp2, w_xtmp3, - tmp4, tmp5, - tmp6); - crc32c_proc_chunk(CRC32C_MIDDLE, const_or_pre_comp_const_index[2], const_or_pre_comp_const_index[3], is_pclmulqdq_supported, - in2, in1, in_out, - tmp1, tmp2, tmp3, - w_xtmp1, w_xtmp2, w_xtmp3, - tmp4, tmp5, - tmp6); - crc32c_proc_chunk(CRC32C_LOW, const_or_pre_comp_const_index[4], const_or_pre_comp_const_index[5], is_pclmulqdq_supported, - in2, in1, in_out, - tmp1, tmp2, tmp3, - w_xtmp1, w_xtmp2, w_xtmp3, - tmp4, tmp5, - tmp6); - movl(tmp1, in2); - andl(tmp1, 0x00000007); - negl(tmp1); - addl(tmp1, in2); - addl(tmp1, in1); - - BIND(L_wordByWord); - cmpl(in1, tmp1); - jcc(Assembler::greaterEqual, L_byteByByteProlog); - crc32(in_out, Address(in1,0), 4); - addl(in1, 4); - jmp(L_wordByWord); - - BIND(L_byteByByteProlog); - andl(in2, 0x00000007); - movl(tmp2, 1); - - BIND(L_byteByByte); - cmpl(tmp2, in2); - jccb(Assembler::greater, L_exit); - movb(tmp1, Address(in1, 0)); - crc32(in_out, tmp1, 1); - incl(in1); - incl(tmp2); - jmp(L_byteByByte); - - BIND(L_exit); -} -#endif // LP64 #undef BIND #undef BLOCK_COMMENT @@ -10258,7 +9113,6 @@ void MacroAssembler::fill64(Register dst, int disp, XMMRegister xmm, bool use64b fill64(Address(dst, disp), xmm, use64byteVector); } -#ifdef _LP64 void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register value, Register count, Register rtmp, XMMRegister xtmp) { Label L_exit; @@ -10435,11 +9289,9 @@ void MacroAssembler::generate_fill_avx3(BasicType type, Register to, Register va } bind(L_exit); } -#endif #endif //COMPILER2_OR_JVMCI -#ifdef _LP64 void MacroAssembler::convert_f2i(Register dst, XMMRegister src) { Label done; cvttss2sil(dst, src); @@ -10603,8 +9455,6 @@ void MacroAssembler::cache_wbsync(bool is_pre) } } -#endif // _LP64 - Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { switch (cond) { // Note some conditions are synonyms for others @@ -10633,29 +9483,25 @@ void MacroAssembler::get_thread(Register thread) { if (thread != rax) { push(rax); } - LP64_ONLY(push(rdi);) - LP64_ONLY(push(rsi);) + push(rdi); + push(rsi); push(rdx); push(rcx); -#ifdef _LP64 push(r8); push(r9); push(r10); push(r11); -#endif MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); -#ifdef _LP64 pop(r11); pop(r10); pop(r9); pop(r8); -#endif pop(rcx); pop(rdx); - LP64_ONLY(pop(rsi);) - LP64_ONLY(pop(rdi);) + pop(rsi); + pop(rdi); if (thread != rax) { mov(thread, rax); pop(rax); @@ -10790,7 +9636,6 @@ void MacroAssembler::lightweight_unlock(Register obj, Register reg_rax, Register bind(unlocked); } -#ifdef _LP64 // Saves legacy GPRs state on stack. void MacroAssembler::save_legacy_gprs() { subq(rsp, 16 * wordSize); @@ -10839,4 +9684,3 @@ void MacroAssembler::setcc(Assembler::Condition comparison, Register dst) { movzbl(dst, dst); } } -#endif diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index c6e5b2a115f03..b17ce7634b3aa 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -148,10 +148,10 @@ class MacroAssembler: public Assembler { // Support for inc/dec with optimal instruction selection depending on value - void increment(Register reg, int value = 1) { LP64_ONLY(incrementq(reg, value)) NOT_LP64(incrementl(reg, value)) ; } - void decrement(Register reg, int value = 1) { LP64_ONLY(decrementq(reg, value)) NOT_LP64(decrementl(reg, value)) ; } - void increment(Address dst, int value = 1) { LP64_ONLY(incrementq(dst, value)) NOT_LP64(incrementl(dst, value)) ; } - void decrement(Address dst, int value = 1) { LP64_ONLY(decrementq(dst, value)) NOT_LP64(decrementl(dst, value)) ; } + void increment(Register reg, int value = 1) { incrementq(reg, value); } + void decrement(Register reg, int value = 1) { decrementq(reg, value); } + void increment(Address dst, int value = 1) { incrementq(dst, value); } + void decrement(Address dst, int value = 1) { decrementq(dst, value); } void decrementl(Address dst, int value = 1); void decrementl(Register reg, int value = 1); @@ -228,7 +228,6 @@ class MacroAssembler: public Assembler { // The pointer will be loaded into the thread register. void get_thread(Register thread); -#ifdef _LP64 // Support for argument shuffling // bias in bytes @@ -244,7 +243,6 @@ class MacroAssembler: public Assembler { VMRegPair dst, bool is_receiver, int* receiver_offset); -#endif // _LP64 // Support for VM calls // @@ -336,12 +334,10 @@ class MacroAssembler: public Assembler { address last_java_pc, Register rscratch); -#ifdef _LP64 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register scratch); -#endif void reset_last_Java_frame(Register thread, bool clear_fp); @@ -371,9 +367,7 @@ class MacroAssembler: public Assembler { void load_method_holder(Register holder, Register method); // oop manipulations -#ifdef _LP64 void load_narrow_klass_compact(Register dst, Register src); -#endif void load_klass(Register dst, Register src, Register tmp); void store_klass(Register dst, Register src, Register tmp); @@ -401,7 +395,6 @@ class MacroAssembler: public Assembler { // stored using routines that take a jobject. void store_heap_oop_null(Address dst); -#ifdef _LP64 void store_klass_gap(Register dst, Register src); // This dummy is to prevent a call to store_heap_oop from @@ -436,8 +429,6 @@ class MacroAssembler: public Assembler { DEBUG_ONLY(void verify_heapbase(const char* msg);) -#endif // _LP64 - // Int division/remainder for Java // (as idivl, but checks for special case as described in JVM spec.) // returns idivl instruction offset for implicit exception handling @@ -477,39 +468,6 @@ class MacroAssembler: public Assembler { // Division by power of 2, rounding towards 0 void division_with_shift(Register reg, int shift_value); -#ifndef _LP64 - // Compares the top-most stack entries on the FPU stack and sets the eflags as follows: - // - // CF (corresponds to C0) if x < y - // PF (corresponds to C2) if unordered - // ZF (corresponds to C3) if x = y - // - // The arguments are in reversed order on the stack (i.e., top of stack is first argument). - // tmp is a temporary register, if none is available use noreg (only matters for non-P6 code) - void fcmp(Register tmp); - // Variant of the above which allows y to be further down the stack - // and which only pops x and y if specified. If pop_right is - // specified then pop_left must also be specified. - void fcmp(Register tmp, int index, bool pop_left, bool pop_right); - - // Floating-point comparison for Java - // Compares the top-most stack entries on the FPU stack and stores the result in dst. - // The arguments are in reversed order on the stack (i.e., top of stack is first argument). - // (semantics as described in JVM spec.) - void fcmp2int(Register dst, bool unordered_is_less); - // Variant of the above which allows y to be further down the stack - // and which only pops x and y if specified. If pop_right is - // specified then pop_left must also be specified. - void fcmp2int(Register dst, bool unordered_is_less, int index, bool pop_left, bool pop_right); - - // Floating-point remainder for Java (ST0 = ST0 fremr ST1, ST1 is empty afterwards) - // tmp is a temporary register, if none is available use noreg - void fremr(Register tmp); - - // only if +VerifyFPU - void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); -#endif // !LP64 - // dst = c = a * b + c void fmad(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); void fmaf(XMMRegister dst, XMMRegister a, XMMRegister b, XMMRegister c); @@ -524,34 +482,18 @@ class MacroAssembler: public Assembler { void cmpss2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); void cmpsd2int(XMMRegister opr1, XMMRegister opr2, Register dst, bool unordered_is_less); - // branch to L if FPU flag C2 is set/not set - // tmp is a temporary register, if none is available use noreg - void jC2 (Register tmp, Label& L); - void jnC2(Register tmp, Label& L); - - // Load float value from 'address'. If UseSSE >= 1, the value is loaded into - // register xmm0. Otherwise, the value is loaded onto the FPU stack. + // Load float value from 'address'. The value is loaded into register xmm0. void load_float(Address src); - // Store float value to 'address'. If UseSSE >= 1, the value is stored - // from register xmm0. Otherwise, the value is stored from the FPU stack. + // Store float value to 'address'. The value is stored from register xmm0. void store_float(Address dst); - // Load double value from 'address'. If UseSSE >= 2, the value is loaded into - // register xmm0. Otherwise, the value is loaded onto the FPU stack. + // Load double value from 'address'. The value is loaded into register xmm0. void load_double(Address src); - // Store double value to 'address'. If UseSSE >= 2, the value is stored - // from register xmm0. Otherwise, the value is stored from the FPU stack. + // Store double value to 'address'. The value is stored from register xmm0. void store_double(Address dst); -#ifndef _LP64 - // Pop ST (ffree & fincstp combined) - void fpop(); - - void empty_FPU_stack(); -#endif // !_LP64 - void push_IU_state(); void pop_IU_state(); @@ -666,7 +608,6 @@ class MacroAssembler: public Assembler { Label* L_failure, bool set_cond_codes = false); -#ifdef _LP64 // The 64-bit version, which may do a hashed subclass lookup. void check_klass_subtype_slow_path(Register sub_klass, Register super_klass, @@ -676,7 +617,6 @@ class MacroAssembler: public Assembler { Register temp4_reg, Label* L_success, Label* L_failure); -#endif // Three parts of a hashed subclass lookup: a simple linear search, // a table lookup, and a fallback that does linear probing in the @@ -713,7 +653,6 @@ class MacroAssembler: public Assembler { Register result, u1 super_klass_slot); -#ifdef _LP64 using Assembler::salq; void salq(Register dest, Register count); using Assembler::rorq; @@ -741,7 +680,6 @@ class MacroAssembler: public Assembler { Register temp1, Register temp2, Register temp3); -#endif void repne_scanq(Register addr, Register value, Register count, Register limit, Label* L_success, @@ -851,10 +789,10 @@ class MacroAssembler: public Assembler { // Arithmetics - void addptr(Address dst, int32_t src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)) ; } + void addptr(Address dst, int32_t src) { addq(dst, src); } void addptr(Address dst, Register src); - void addptr(Register dst, Address src) { LP64_ONLY(addq(dst, src)) NOT_LP64(addl(dst, src)); } + void addptr(Register dst, Address src) { addq(dst, src); } void addptr(Register dst, int32_t src); void addptr(Register dst, Register src); void addptr(Register dst, RegisterOrConstant src) { @@ -863,12 +801,10 @@ class MacroAssembler: public Assembler { } void andptr(Register dst, int32_t src); - void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; } + void andptr(Register src1, Register src2) { andq(src1, src2); } -#ifdef _LP64 using Assembler::andq; void andq(Register dst, AddressLiteral src, Register rscratch = noreg); -#endif void cmp8(AddressLiteral src1, int imm, Register rscratch = noreg); @@ -881,12 +817,6 @@ class MacroAssembler: public Assembler { void cmp32(Register src1, Address src2); -#ifndef _LP64 - void cmpklass(Address dst, Metadata* obj); - void cmpklass(Register dst, Metadata* obj); - void cmpoop(Address dst, jobject obj); -#endif // _LP64 - void cmpoop(Register src1, Register src2); void cmpoop(Register src1, Address src2); void cmpoop(Register dst, jobject obj, Register rscratch); @@ -896,12 +826,11 @@ class MacroAssembler: public Assembler { void cmpptr(Register src1, AddressLiteral src2, Register rscratch = noreg); - void cmpptr(Register src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - void cmpptr(Register src1, Address src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - // void cmpptr(Address src1, Register src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + void cmpptr(Register src1, Register src2) { cmpq(src1, src2); } + void cmpptr(Register src1, Address src2) { cmpq(src1, src2); } - void cmpptr(Register src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } - void cmpptr(Address src1, int32_t src2) { LP64_ONLY(cmpq(src1, src2)) NOT_LP64(cmpl(src1, src2)) ; } + void cmpptr(Register src1, int32_t src2) { cmpq(src1, src2); } + void cmpptr(Address src1, int32_t src2) { cmpq(src1, src2); } // cmp64 to avoild hiding cmpq void cmp64(Register src1, AddressLiteral src, Register rscratch = noreg); @@ -910,26 +839,26 @@ class MacroAssembler: public Assembler { void locked_cmpxchgptr(Register reg, AddressLiteral adr, Register rscratch = noreg); - void imulptr(Register dst, Register src) { LP64_ONLY(imulq(dst, src)) NOT_LP64(imull(dst, src)); } - void imulptr(Register dst, Register src, int imm32) { LP64_ONLY(imulq(dst, src, imm32)) NOT_LP64(imull(dst, src, imm32)); } + void imulptr(Register dst, Register src) { imulq(dst, src); } + void imulptr(Register dst, Register src, int imm32) { imulq(dst, src, imm32); } - void negptr(Register dst) { LP64_ONLY(negq(dst)) NOT_LP64(negl(dst)); } + void negptr(Register dst) { negq(dst); } - void notptr(Register dst) { LP64_ONLY(notq(dst)) NOT_LP64(notl(dst)); } + void notptr(Register dst) { notq(dst); } void shlptr(Register dst, int32_t shift); - void shlptr(Register dst) { LP64_ONLY(shlq(dst)) NOT_LP64(shll(dst)); } + void shlptr(Register dst) { shlq(dst); } void shrptr(Register dst, int32_t shift); - void shrptr(Register dst) { LP64_ONLY(shrq(dst)) NOT_LP64(shrl(dst)); } + void shrptr(Register dst) { shrq(dst); } - void sarptr(Register dst) { LP64_ONLY(sarq(dst)) NOT_LP64(sarl(dst)); } - void sarptr(Register dst, int32_t src) { LP64_ONLY(sarq(dst, src)) NOT_LP64(sarl(dst, src)); } + void sarptr(Register dst) { sarq(dst); } + void sarptr(Register dst, int32_t src) { sarq(dst, src); } - void subptr(Address dst, int32_t src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } + void subptr(Address dst, int32_t src) { subq(dst, src); } - void subptr(Register dst, Address src) { LP64_ONLY(subq(dst, src)) NOT_LP64(subl(dst, src)); } + void subptr(Register dst, Address src) { subq(dst, src); } void subptr(Register dst, int32_t src); // Force generation of a 4 byte immediate value even if it fits into 8bit void subptr_imm32(Register dst, int32_t src); @@ -939,13 +868,13 @@ class MacroAssembler: public Assembler { else subptr(dst, src.as_register()); } - void sbbptr(Address dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } - void sbbptr(Register dst, int32_t src) { LP64_ONLY(sbbq(dst, src)) NOT_LP64(sbbl(dst, src)); } + void sbbptr(Address dst, int32_t src) { sbbq(dst, src); } + void sbbptr(Register dst, int32_t src) { sbbq(dst, src); } - void xchgptr(Register src1, Register src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } - void xchgptr(Register src1, Address src2) { LP64_ONLY(xchgq(src1, src2)) NOT_LP64(xchgl(src1, src2)) ; } + void xchgptr(Register src1, Register src2) { xchgq(src1, src2); } + void xchgptr(Register src1, Address src2) { xchgq(src1, src2); } - void xaddptr(Address src1, Register src2) { LP64_ONLY(xaddq(src1, src2)) NOT_LP64(xaddl(src1, src2)) ; } + void xaddptr(Address src1, Register src2) { xaddq(src1, src2); } @@ -955,12 +884,10 @@ class MacroAssembler: public Assembler { // Unconditional atomic increment. void atomic_incl(Address counter_addr); void atomic_incl(AddressLiteral counter_addr, Register rscratch = noreg); -#ifdef _LP64 void atomic_incq(Address counter_addr); void atomic_incq(AddressLiteral counter_addr, Register rscratch = noreg); -#endif - void atomic_incptr(AddressLiteral counter_addr, Register rscratch = noreg) { LP64_ONLY(atomic_incq(counter_addr, rscratch)) NOT_LP64(atomic_incl(counter_addr, rscratch)) ; } - void atomic_incptr(Address counter_addr) { LP64_ONLY(atomic_incq(counter_addr)) NOT_LP64(atomic_incl(counter_addr)) ; } + void atomic_incptr(AddressLiteral counter_addr, Register rscratch = noreg) { atomic_incq(counter_addr, rscratch); } + void atomic_incptr(Address counter_addr) { atomic_incq(counter_addr); } using Assembler::lea; void lea(Register dst, AddressLiteral adr); @@ -978,18 +905,18 @@ class MacroAssembler: public Assembler { void testq(Address dst, int32_t imm32); void testq(Register dst, int32_t imm32); - void orptr(Register dst, Address src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - void orptr(Register dst, Register src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - void orptr(Register dst, int32_t src) { LP64_ONLY(orq(dst, src)) NOT_LP64(orl(dst, src)); } - void orptr(Address dst, int32_t imm32) { LP64_ONLY(orq(dst, imm32)) NOT_LP64(orl(dst, imm32)); } + void orptr(Register dst, Address src) { orq(dst, src); } + void orptr(Register dst, Register src) { orq(dst, src); } + void orptr(Register dst, int32_t src) { orq(dst, src); } + void orptr(Address dst, int32_t imm32) { orq(dst, imm32); } - void testptr(Register src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } - void testptr(Register src1, Address src2) { LP64_ONLY(testq(src1, src2)) NOT_LP64(testl(src1, src2)); } - void testptr(Address src, int32_t imm32) { LP64_ONLY(testq(src, imm32)) NOT_LP64(testl(src, imm32)); } + void testptr(Register src, int32_t imm32) { testq(src, imm32); } + void testptr(Register src1, Address src2) { testq(src1, src2); } + void testptr(Address src, int32_t imm32) { testq(src, imm32); } void testptr(Register src1, Register src2); - void xorptr(Register dst, Register src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } - void xorptr(Register dst, Address src) { LP64_ONLY(xorq(dst, src)) NOT_LP64(xorl(dst, src)); } + void xorptr(Register dst, Register src) { xorq(dst, src); } + void xorptr(Register dst, Address src) { xorq(dst, src); } // Calls @@ -1114,31 +1041,9 @@ class MacroAssembler: public Assembler { void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); } void comisd(XMMRegister dst, AddressLiteral src, Register rscratch = noreg); -#ifndef _LP64 - void fadd_s(Address src) { Assembler::fadd_s(src); } - void fadd_s(AddressLiteral src) { Assembler::fadd_s(as_Address(src)); } - - void fldcw(Address src) { Assembler::fldcw(src); } - void fldcw(AddressLiteral src); - - void fld_s(int index) { Assembler::fld_s(index); } - void fld_s(Address src) { Assembler::fld_s(src); } - void fld_s(AddressLiteral src); - - void fld_d(Address src) { Assembler::fld_d(src); } - void fld_d(AddressLiteral src); - - void fld_x(Address src) { Assembler::fld_x(src); } - void fld_x(AddressLiteral src) { Assembler::fld_x(as_Address(src)); } - - void fmul_s(Address src) { Assembler::fmul_s(src); } - void fmul_s(AddressLiteral src) { Assembler::fmul_s(as_Address(src)); } -#endif // !_LP64 - void ldmxcsr(Address src) { Assembler::ldmxcsr(src); } void ldmxcsr(AddressLiteral src, Register rscratch = noreg); -#ifdef _LP64 private: void sha256_AVX2_one_round_compute( Register reg_old_h, @@ -1188,7 +1093,6 @@ class MacroAssembler: public Assembler { Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, XMMRegister shuf_mask); void sha512_update_ni_x1(Register arg_hash, Register arg_msg, Register ofs, Register limit, bool multi_block); -#endif // _LP64 void fast_md5(Register buf, Address state, Address ofs, Address limit, bool multi_block); @@ -1198,68 +1102,15 @@ class MacroAssembler: public Assembler { Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block); -#ifdef _LP64 void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, XMMRegister shuf_mask); -#else - void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, - XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, - Register buf, Register state, Register ofs, Register limit, Register rsp, - bool multi_block); -#endif void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, Register rdx, Register tmp); -#ifndef _LP64 - private: - // Initialized in macroAssembler_x86_constants.cpp - static address ONES; - static address L_2IL0FLOATPACKET_0; - static address PI4_INV; - static address PI4X3; - static address PI4X4; - - public: - void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx, Register tmp1); - - void fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx, Register tmp); - - void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, - XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx, - Register rdx, Register tmp); - - void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rbx, Register rdx); - - void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx, Register tmp); - - void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, - Register edx, Register ebx, Register esi, Register edi, - Register ebp, Register esp); - - void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, - Register esi, Register edi, Register ebp, Register esp); - - void libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, - Register edx, Register ebx, Register esi, Register edi, - Register ebp, Register esp); - - void fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register rax, Register rcx, Register rdx, Register tmp); -#endif // !_LP64 - private: // these are private because users should be doing movflt/movdbl @@ -2016,8 +1867,8 @@ class MacroAssembler: public Assembler { void cmov( Condition cc, Register dst, Register src) { cmovptr(cc, dst, src); } - void cmovptr(Condition cc, Register dst, Address src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } - void cmovptr(Condition cc, Register dst, Register src) { LP64_ONLY(cmovq(cc, dst, src)) NOT_LP64(cmov32(cc, dst, src)); } + void cmovptr(Condition cc, Register dst, Address src) { cmovq(cc, dst, src); } + void cmovptr(Condition cc, Register dst, Register src) { cmovq(cc, dst, src); } void movoop(Register dst, jobject obj); void movoop(Address dst, jobject obj, Register rscratch); @@ -2056,15 +1907,15 @@ class MacroAssembler: public Assembler { // Can push value or effective address void pushptr(AddressLiteral src, Register rscratch); - void pushptr(Address src) { LP64_ONLY(pushq(src)) NOT_LP64(pushl(src)); } - void popptr(Address src) { LP64_ONLY(popq(src)) NOT_LP64(popl(src)); } + void pushptr(Address src) { pushq(src); } + void popptr(Address src) { popq(src); } void pushoop(jobject obj, Register rscratch); void pushklass(Metadata* obj, Register rscratch); // sign extend as need a l to ptr sized element - void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); } - void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); } + void movl2ptr(Register dst, Address src) { movslq(dst, src); } + void movl2ptr(Register dst, Register src) { movslq(dst, src); } public: @@ -2087,7 +1938,6 @@ class MacroAssembler: public Assembler { XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3, XMMRegister tmp4, Register tmp5, Register result, bool ascii); -#ifdef _LP64 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2); void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, Register y, Register y_idx, Register z, @@ -2128,32 +1978,23 @@ class MacroAssembler: public Assembler { void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale, Register result, Register tmp1, Register tmp2, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3); -#endif // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. void update_byte_crc32(Register crc, Register val, Register table); void kernel_crc32(Register crc, Register buf, Register len, Register table, Register tmp); -#ifdef _LP64 void kernel_crc32_avx512(Register crc, Register buf, Register len, Register table, Register tmp1, Register tmp2); void kernel_crc32_avx512_256B(Register crc, Register buf, Register len, Register key, Register pos, Register tmp1, Register tmp2, Label& L_barrett, Label& L_16B_reduction_loop, Label& L_get_last_two_xmms, Label& L_128_done, Label& L_cleanup); -#endif // _LP64 // CRC32C code for java.util.zip.CRC32C::updateBytes() intrinsic // Note on a naming convention: // Prefix w = register only used on a Westmere+ architecture // Prefix n = register only used on a Nehalem architecture -#ifdef _LP64 void crc32c_ipl_alg4(Register in_out, uint32_t n, Register tmp1, Register tmp2, Register tmp3); -#else - void crc32c_ipl_alg4(Register in_out, uint32_t n, - Register tmp1, Register tmp2, Register tmp3, - XMMRegister xtmp1, XMMRegister xtmp2); -#endif void crc32c_pclmulqdq(XMMRegister w_xtmp1, Register in_out, uint32_t const_or_pre_comp_const_index, bool is_pclmulqdq_supported, @@ -2178,10 +2019,8 @@ class MacroAssembler: public Assembler { // Fold 128-bit data chunk void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset); void fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf); -#ifdef _LP64 // Fold 512-bit data chunk void fold512bit_crc32_avx512(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, Register pos, int offset); -#endif // _LP64 // Fold 8-bit data void fold_8bit_crc32(Register crc, Register table, Register tmp); void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp); @@ -2215,7 +2054,6 @@ class MacroAssembler: public Assembler { void fill64(Register dst, int dis, XMMRegister xmm, bool use64byteVector = false); -#ifdef _LP64 void convert_f2i(Register dst, XMMRegister src); void convert_d2i(Register dst, XMMRegister src); void convert_f2l(Register dst, XMMRegister src); @@ -2230,7 +2068,6 @@ class MacroAssembler: public Assembler { void generate_fill_avx3(BasicType type, Register to, Register value, Register count, Register rtmp, XMMRegister xtmp); #endif // COMPILER2_OR_JVMCI -#endif // _LP64 void vallones(XMMRegister dst, int vector_len); @@ -2239,11 +2076,9 @@ class MacroAssembler: public Assembler { void lightweight_lock(Register basic_lock, Register obj, Register reg_rax, Register thread, Register tmp, Label& slow); void lightweight_unlock(Register obj, Register reg_rax, Register thread, Register tmp, Label& slow); -#ifdef _LP64 void save_legacy_gprs(); void restore_legacy_gprs(); void setcc(Assembler::Condition comparison, Register dst); -#endif }; #endif // CPU_X86_MACROASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp deleted file mode 100644 index e177c7d94624b..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_constants.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "macroAssembler_x86.hpp" - -ATTRIBUTE_ALIGNED(16) static const juint _ONES[] = { - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xbff00000UL -}; -address MacroAssembler::ONES = (address)_ONES; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4_INV[] = { - 0x6dc9c883UL, 0x3ff45f30UL -}; -address MacroAssembler::PI4_INV = (address)_PI4_INV; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4X3[] = { - 0x54443000UL, 0xbfe921fbUL, 0x3b39a000UL, 0x3d373dcbUL, 0xe0e68948UL, - 0xba845c06UL -}; -address MacroAssembler::PI4X3 = (address)_PI4X3; - -ATTRIBUTE_ALIGNED(16) static const juint _PI4X4[] = { - 0x54400000UL, 0xbfe921fbUL, 0x1a600000UL, 0xbdc0b461UL, 0x2e000000UL, - 0xbb93198aUL, 0x252049c1UL, 0xb96b839aUL -}; -address MacroAssembler::PI4X4 = (address)_PI4X4; - -ATTRIBUTE_ALIGNED(16) static const juint _L_2IL0FLOATPACKET_0[] = { - 0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL -}; -address MacroAssembler::L_2IL0FLOATPACKET_0 = (address)_L_2IL0FLOATPACKET_0; diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp deleted file mode 100644 index ce71bb50d8232..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_cos.cpp +++ /dev/null @@ -1,428 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - COS() -// --------------------- -// -// 1. RANGE REDUCTION -// -// We perform an initial range reduction from X to r with -// -// X =~= N * pi/32 + r -// -// so that |r| <= pi/64 + epsilon. We restrict inputs to those -// where |N| <= 932560. Beyond this, the range reduction is -// insufficiently accurate. For extremely small inputs, -// denormalization can occur internally, impacting performance. -// This means that the main path is actually only taken for -// 2^-252 <= |X| < 90112. -// -// To avoid branches, we perform the range reduction to full -// accuracy each time. -// -// X - N * (P_1 + P_2 + P_3) -// -// where P_1 and P_2 are 32-bit numbers (so multiplication by N -// is exact) and P_3 is a 53-bit number. Together, these -// approximate pi well enough for all cases in the restricted -// range. -// -// The main reduction sequence is: -// -// y = 32/pi * x -// N = integer(y) -// (computed by adding and subtracting off SHIFTER) -// -// m_1 = N * P_1 -// m_2 = N * P_2 -// r_1 = x - m_1 -// r = r_1 - m_2 -// (this r can be used for most of the calculation) -// -// c_1 = r_1 - r -// m_3 = N * P_3 -// c_2 = c_1 - m_2 -// c = c_2 - m_3 -// -// 2. MAIN ALGORITHM -// -// The algorithm uses a table lookup based on B = M * pi / 32 -// where M = N mod 64. The stored values are: -// sigma closest power of 2 to cos(B) -// C_hl 53-bit cos(B) - sigma -// S_hi + S_lo 2 * 53-bit sin(B) -// -// The computation is organized as follows: -// -// sin(B + r + c) = [sin(B) + sigma * r] + -// r * (cos(B) - sigma) + -// sin(B) * [cos(r + c) - 1] + -// cos(B) * [sin(r + c) - r] -// -// which is approximately: -// -// [S_hi + sigma * r] + -// C_hl * r + -// S_lo + S_hi * [(cos(r) - 1) - r * c] + -// (C_hl + sigma) * [(sin(r) - r) + c] -// -// and this is what is actually computed. We separate this sum -// into four parts: -// -// hi + med + pols + corr -// -// where -// -// hi = S_hi + sigma r -// med = C_hl * r -// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) -// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) -// -// 3. POLYNOMIAL -// -// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * -// (sin(r) - r) can be rearranged freely, since it is quite -// small, so we exploit parallelism to the fullest. -// -// psc4 = SC_4 * r_1 -// msc4 = psc4 * r -// r2 = r * r -// msc2 = SC_2 * r2 -// r4 = r2 * r2 -// psc3 = SC_3 + msc4 -// psc1 = SC_1 + msc2 -// msc3 = r4 * psc3 -// sincospols = psc1 + msc3 -// pols = sincospols * -// -// -// 4. CORRECTION TERM -// -// This is where the "c" component of the range reduction is -// taken into account; recall that just "r" is used for most of -// the calculation. -// -// -c = m_3 - c_2 -// -d = S_hi * r - (C_hl + sigma) -// corr = -c * -d + S_lo -// -// 5. COMPENSATED SUMMATIONS -// -// The two successive compensated summations add up the high -// and medium parts, leaving just the low parts to add up at -// the end. -// -// rs = sigma * r -// res_int = S_hi + rs -// k_0 = S_hi - res_int -// k_2 = k_0 + rs -// med = C_hl * r -// res_hi = res_int + med -// k_1 = res_int - res_hi -// k_3 = k_1 + med -// -// 6. FINAL SUMMATION -// -// We now add up all the small parts: -// -// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 -// -// Now the overall result is just: -// -// res_hi + res_lo -// -// 7. SMALL ARGUMENTS -// -// Inputs with |X| < 2^-252 are treated specially as -// 1 - |x|. -// -// Special cases: -// cos(NaN) = quiet NaN, and raise invalid exception -// cos(INF) = NaN and raise invalid exception -// cos(0) = 1 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_cos[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, - 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, - 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, - 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, - 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, - 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, - 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, - 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, - 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, - 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, - 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, - 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, - 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, - 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, - 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, - 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, - 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, - 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, - 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, - 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, - 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, - 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, - 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, - 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, - 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, - 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, - 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, - 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, - 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, - 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, - 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, - 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, - 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, - 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, - 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, - 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, - 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, - 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, - 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, - 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, - 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, - 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, - 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, - 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, - 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, - 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, - 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, - 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, - 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, - 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, - 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, - 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, - 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, - 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, - 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, - 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, - 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, - 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, - 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, - 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, - 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, - 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, - 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, - 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, - 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, - 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, - 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, - 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, - 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, - 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, - 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, - 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, - 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, - 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, - 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, - 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, - 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, - 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, - 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, - 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, - 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL -}; -//registers, -// input: (rbp + 8) -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// eax, ecx, edx, ebx (tmp) - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label start; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_cos = (address)_static_const_table_cos; - - bind(start); - subl(rsp, 120); - movl(Address(rsp, 56), tmp); - lea(tmp, ExternalAddress(static_const_table_cos)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 12336); - cmpl(eax, 4293); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movsd(xmm1, Address(tmp, 2160)); - mulsd(xmm1, xmm0); - movdqu(xmm5, Address(tmp, 2240)); - movsd(xmm4, Address(tmp, 2224)); - pand(xmm4, xmm0); - por(xmm5, xmm4); - movsd(xmm3, Address(tmp, 2128)); - movdqu(xmm2, Address(tmp, 2112)); - addpd(xmm1, xmm5); - cvttsd2sil(edx, xmm1); - cvtsi2sdl(xmm1, edx); - mulsd(xmm3, xmm1); - unpcklpd(xmm1, xmm1); - addl(edx, 1865232); - movdqu(xmm4, xmm0); - andl(edx, 63); - movdqu(xmm5, Address(tmp, 2096)); - lea(eax, Address(tmp, 0)); - shll(edx, 5); - addl(eax, edx); - mulpd(xmm2, xmm1); - subsd(xmm0, xmm3); - mulsd(xmm1, Address(tmp, 2144)); - subsd(xmm4, xmm3); - movsd(xmm7, Address(eax, 8)); - unpcklpd(xmm0, xmm0); - movapd(xmm3, xmm4); - subsd(xmm4, xmm2); - mulpd(xmm5, xmm0); - subpd(xmm0, xmm2); - movdqu(xmm6, Address(tmp, 2064)); - mulsd(xmm7, xmm4); - subsd(xmm3, xmm4); - mulpd(xmm5, xmm0); - mulpd(xmm0, xmm0); - subsd(xmm3, xmm2); - movdqu(xmm2, Address(eax, 0)); - subsd(xmm1, xmm3); - movsd(xmm3, Address(eax, 24)); - addsd(xmm2, xmm3); - subsd(xmm7, xmm2); - mulsd(xmm2, xmm4); - mulpd(xmm6, xmm0); - mulsd(xmm3, xmm4); - mulpd(xmm2, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm5, Address(tmp, 2080)); - mulsd(xmm4, Address(eax, 0)); - addpd(xmm6, Address(tmp, 2048)); - mulpd(xmm5, xmm0); - movapd(xmm0, xmm3); - addsd(xmm3, Address(eax, 8)); - mulpd(xmm1, xmm7); - movapd(xmm7, xmm4); - addsd(xmm4, xmm3); - addpd(xmm6, xmm5); - movsd(xmm5, Address(eax, 8)); - subsd(xmm5, xmm3); - subsd(xmm3, xmm4); - addsd(xmm1, Address(eax, 16)); - mulpd(xmm6, xmm2); - addsd(xmm5, xmm0); - addsd(xmm3, xmm7); - addsd(xmm1, xmm5); - addsd(xmm1, xmm3); - addsd(xmm1, xmm6); - unpckhpd(xmm6, xmm6); - addsd(xmm1, xmm6); - addsd(xmm4, xmm1); - movsd(Address(rsp, 0), xmm4); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - pinsrw(xmm0, eax, 3); - movsd(xmm1, Address(tmp, 2192)); - subsd(xmm1, xmm0); - movsd(Address(rsp, 0), xmm1); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2146435072); - cmpl(eax, 2146435072); - jcc(Assembler::equal, L_2TAG_PACKET_3_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 1); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 8)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_3_0_2); - fld_d(Address(rsp, 128)); - fmul_d(Address(tmp, 2208)); - - bind(L_2TAG_PACKET_1_0_2); - movl(tmp, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp deleted file mode 100644 index a490510b959d3..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_exp.cpp +++ /dev/null @@ -1,330 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - EXP() -// --------------------- -// -// Description: -// Let K = 64 (table size). -// x x/log(2) n -// e = 2 = 2 * T[j] * (1 + P(y)) -// where -// x = m*log(2)/K + y, y in [-log(2)/K..log(2)/K] -// m = n*K + j, m,n,j - signed integer, j in [-K/2..K/2] -// j/K -// values of 2 are tabulated as T[j] = T_hi[j] ( 1 + T_lo[j]). -// -// P(y) is a minimax polynomial approximation of exp(x)-1 -// on small interval [-log(2)/K..log(2)/K] (were calculated by Maple V). -// -// To avoid problems with arithmetic overflow and underflow, -// n n1 n2 -// value of 2 is safely computed as 2 * 2 where n1 in [-BIAS/2..BIAS/2] -// where BIAS is a value of exponent bias. -// -// Special cases: -// exp(NaN) = NaN -// exp(+INF) = +INF -// exp(-INF) = 0 -// exp(x) = 1 for subnormals -// for finite argument, only exp(0)=1 is exact -// For IEEE double -// if x > 709.782712893383973096 then exp(x) overflow -// if x < -745.133219101941108420 then exp(x) underflow -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table[] = -{ - 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL, 0xffffffc0UL, - 0x00000000UL, 0xffffffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL, - 0x0000ffc0UL, 0x00000000UL, 0x00000000UL, 0x43380000UL, 0x00000000UL, - 0x43380000UL, 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL, - 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL, 0xbc9e3b3aUL, - 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xfffffffeUL, 0x3fdfffffUL, - 0xfffffffeUL, 0x3fdfffffUL, 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, - 0x3fa55555UL, 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x0e03754dUL, - 0x3cad7bbfUL, 0x3e778060UL, 0x00002c9aUL, 0x3567f613UL, 0x3c8cd252UL, - 0xd3158574UL, 0x000059b0UL, 0x61e6c861UL, 0x3c60f74eUL, 0x18759bc8UL, - 0x00008745UL, 0x5d837b6cUL, 0x3c979aa6UL, 0x6cf9890fUL, 0x0000b558UL, - 0x702f9cd1UL, 0x3c3ebe3dUL, 0x32d3d1a2UL, 0x0000e3ecUL, 0x1e63bcd8UL, - 0x3ca3516eUL, 0xd0125b50UL, 0x00011301UL, 0x26f0387bUL, 0x3ca4c554UL, - 0xaea92ddfUL, 0x0001429aUL, 0x62523fb6UL, 0x3ca95153UL, 0x3c7d517aUL, - 0x000172b8UL, 0x3f1353bfUL, 0x3c8b898cUL, 0xeb6fcb75UL, 0x0001a35bUL, - 0x3e3a2f5fUL, 0x3c9aecf7UL, 0x3168b9aaUL, 0x0001d487UL, 0x44a6c38dUL, - 0x3c8a6f41UL, 0x88628cd6UL, 0x0002063bUL, 0xe3a8a894UL, 0x3c968efdUL, - 0x6e756238UL, 0x0002387aUL, 0x981fe7f2UL, 0x3c80472bUL, 0x65e27cddUL, - 0x00026b45UL, 0x6d09ab31UL, 0x3c82f7e1UL, 0xf51fdee1UL, 0x00029e9dUL, - 0x720c0ab3UL, 0x3c8b3782UL, 0xa6e4030bUL, 0x0002d285UL, 0x4db0abb6UL, - 0x3c834d75UL, 0x0a31b715UL, 0x000306feUL, 0x5dd3f84aUL, 0x3c8fdd39UL, - 0xb26416ffUL, 0x00033c08UL, 0xcc187d29UL, 0x3ca12f8cUL, 0x373aa9caUL, - 0x000371a7UL, 0x738b5e8bUL, 0x3ca7d229UL, 0x34e59ff6UL, 0x0003a7dbUL, - 0xa72a4c6dUL, 0x3c859f48UL, 0x4c123422UL, 0x0003dea6UL, 0x259d9205UL, - 0x3ca8b846UL, 0x21f72e29UL, 0x0004160aUL, 0x60c2ac12UL, 0x3c4363edUL, - 0x6061892dUL, 0x00044e08UL, 0xdaa10379UL, 0x3c6ecce1UL, 0xb5c13cd0UL, - 0x000486a2UL, 0xbb7aafb0UL, 0x3c7690ceUL, 0xd5362a27UL, 0x0004bfdaUL, - 0x9b282a09UL, 0x3ca083ccUL, 0x769d2ca6UL, 0x0004f9b2UL, 0xc1aae707UL, - 0x3ca509b0UL, 0x569d4f81UL, 0x0005342bUL, 0x18fdd78eUL, 0x3c933505UL, - 0x36b527daUL, 0x00056f47UL, 0xe21c5409UL, 0x3c9063e1UL, 0xdd485429UL, - 0x0005ab07UL, 0x2b64c035UL, 0x3c9432e6UL, 0x15ad2148UL, 0x0005e76fUL, - 0x99f08c0aUL, 0x3ca01284UL, 0xb03a5584UL, 0x0006247eUL, 0x0073dc06UL, - 0x3c99f087UL, 0x82552224UL, 0x00066238UL, 0x0da05571UL, 0x3c998d4dUL, - 0x667f3bccUL, 0x0006a09eUL, 0x86ce4786UL, 0x3ca52bb9UL, 0x3c651a2eUL, - 0x0006dfb2UL, 0x206f0dabUL, 0x3ca32092UL, 0xe8ec5f73UL, 0x00071f75UL, - 0x8e17a7a6UL, 0x3ca06122UL, 0x564267c8UL, 0x00075febUL, 0x461e9f86UL, - 0x3ca244acUL, 0x73eb0186UL, 0x0007a114UL, 0xabd66c55UL, 0x3c65ebe1UL, - 0x36cf4e62UL, 0x0007e2f3UL, 0xbbff67d0UL, 0x3c96fe9fUL, 0x994cce12UL, - 0x00082589UL, 0x14c801dfUL, 0x3c951f14UL, 0x9b4492ecUL, 0x000868d9UL, - 0xc1f0eab4UL, 0x3c8db72fUL, 0x422aa0dbUL, 0x0008ace5UL, 0x59f35f44UL, - 0x3c7bf683UL, 0x99157736UL, 0x0008f1aeUL, 0x9c06283cUL, 0x3ca360baUL, - 0xb0cdc5e4UL, 0x00093737UL, 0x20f962aaUL, 0x3c95e8d1UL, 0x9fde4e4fUL, - 0x00097d82UL, 0x2b91ce27UL, 0x3c71affcUL, 0x82a3f090UL, 0x0009c491UL, - 0x589a2ebdUL, 0x3c9b6d34UL, 0x7b5de564UL, 0x000a0c66UL, 0x9ab89880UL, - 0x3c95277cUL, 0xb23e255cUL, 0x000a5503UL, 0x6e735ab3UL, 0x3c846984UL, - 0x5579fdbfUL, 0x000a9e6bUL, 0x92cb3387UL, 0x3c8c1a77UL, 0x995ad3adUL, - 0x000ae89fUL, 0xdc2d1d96UL, 0x3ca22466UL, 0xb84f15faUL, 0x000b33a2UL, - 0xb19505aeUL, 0x3ca1112eUL, 0xf2fb5e46UL, 0x000b7f76UL, 0x0a5fddcdUL, - 0x3c74ffd7UL, 0x904bc1d2UL, 0x000bcc1eUL, 0x30af0cb3UL, 0x3c736eaeUL, - 0xdd85529cUL, 0x000c199bUL, 0xd10959acUL, 0x3c84e08fUL, 0x2e57d14bUL, - 0x000c67f1UL, 0x6c921968UL, 0x3c676b2cUL, 0xdcef9069UL, 0x000cb720UL, - 0x36df99b3UL, 0x3c937009UL, 0x4a07897bUL, 0x000d072dUL, 0xa63d07a7UL, - 0x3c74a385UL, 0xdcfba487UL, 0x000d5818UL, 0xd5c192acUL, 0x3c8e5a50UL, - 0x03db3285UL, 0x000da9e6UL, 0x1c4a9792UL, 0x3c98bb73UL, 0x337b9b5eUL, - 0x000dfc97UL, 0x603a88d3UL, 0x3c74b604UL, 0xe78b3ff6UL, 0x000e502eUL, - 0x92094926UL, 0x3c916f27UL, 0xa2a490d9UL, 0x000ea4afUL, 0x41aa2008UL, - 0x3c8ec3bcUL, 0xee615a27UL, 0x000efa1bUL, 0x31d185eeUL, 0x3c8a64a9UL, - 0x5b6e4540UL, 0x000f5076UL, 0x4d91cd9dUL, 0x3c77893bUL, 0x819e90d8UL, - 0x000fa7c1UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x7ff00000UL, - 0x00000000UL, 0x00000000UL, 0xffffffffUL, 0x7fefffffUL, 0x00000000UL, - 0x00100000UL -}; - -//registers, -// input: (rbp + 8) -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; - Label L_2TAG_PACKET_12_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - address static_const_table = (address)_static_const_table; - - subl(rsp, 120); - movl(Address(rsp, 64), tmp); - lea(tmp, ExternalAddress(static_const_table)); - movsd(xmm0, Address(rsp, 128)); - unpcklpd(xmm0, xmm0); - movdqu(xmm1, Address(tmp, 64)); // 0x652b82feUL, 0x40571547UL, 0x652b82feUL, 0x40571547UL - movdqu(xmm6, Address(tmp, 48)); // 0x00000000UL, 0x43380000UL, 0x00000000UL, 0x43380000UL - movdqu(xmm2, Address(tmp, 80)); // 0xfefa0000UL, 0x3f862e42UL, 0xfefa0000UL, 0x3f862e42UL - movdqu(xmm3, Address(tmp, 96)); // 0xbc9e3b3aUL, 0x3d1cf79aUL, 0xbc9e3b3aUL, 0x3d1cf79aUL - pextrw(eax, xmm0, 3); - andl(eax, 32767); - movl(edx, 16527); - subl(edx, eax); - subl(eax, 15504); - orl(edx, eax); - cmpl(edx, INT_MIN); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - mulpd(xmm1, xmm0); - addpd(xmm1, xmm6); - movapd(xmm7, xmm1); - subpd(xmm1, xmm6); - mulpd(xmm2, xmm1); - movdqu(xmm4, Address(tmp, 128)); // 0xe3289860UL, 0x3f56c15cUL, 0x555b9e25UL, 0x3fa55555UL - mulpd(xmm3, xmm1); - movdqu(xmm5, Address(tmp, 144)); // 0xc090cf0fUL, 0x3f811115UL, 0x55548ba1UL, 0x3fc55555UL - subpd(xmm0, xmm2); - movdl(eax, xmm7); - movl(ecx, eax); - andl(ecx, 63); - shll(ecx, 4); - sarl(eax, 6); - movl(edx, eax); - movdqu(xmm6, Address(tmp, 16)); // 0xffffffc0UL, 0x00000000UL, 0xffffffc0UL, 0x00000000UL - pand(xmm7, xmm6); - movdqu(xmm6, Address(tmp, 32)); // 0x0000ffc0UL, 0x00000000UL, 0x0000ffc0UL, 0x00000000UL - paddq(xmm7, xmm6); - psllq(xmm7, 46); - subpd(xmm0, xmm3); - movdqu(xmm2, Address(tmp, ecx, Address::times_1, 160)); - mulpd(xmm4, xmm0); - movapd(xmm6, xmm0); - movapd(xmm1, xmm0); - mulpd(xmm6, xmm6); - mulpd(xmm0, xmm6); - addpd(xmm5, xmm4); - mulsd(xmm0, xmm6); - mulpd(xmm6, Address(tmp, 112)); // 0xfffffffeUL, 0x3fdfffffUL, 0xfffffffeUL, 0x3fdfffffUL - addsd(xmm1, xmm2); - unpckhpd(xmm2, xmm2); - mulpd(xmm0, xmm5); - addsd(xmm1, xmm0); - por(xmm2, xmm7); - unpckhpd(xmm0, xmm0); - addsd(xmm0, xmm1); - addsd(xmm0, xmm6); - addl(edx, 894); - cmpl(edx, 1916); - jcc(Assembler::above, L_2TAG_PACKET_1_0_2); - mulsd(xmm0, xmm2); - addsd(xmm0, xmm2); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_1_0_2); - fnstcw(Address(rsp, 24)); - movzwl(edx, Address(rsp, 24)); - orl(edx, 768); - movw(Address(rsp, 28), edx); - fldcw(Address(rsp, 28)); - movl(edx, eax); - sarl(eax, 1); - subl(edx, eax); - movdqu(xmm6, Address(tmp, 0)); // 0x00000000UL, 0xfff00000UL, 0x00000000UL, 0xfff00000UL - pandn(xmm6, xmm2); - addl(eax, 1023); - movdl(xmm3, eax); - psllq(xmm3, 52); - por(xmm6, xmm3); - addl(edx, 1023); - movdl(xmm4, edx); - psllq(xmm4, 52); - movsd(Address(rsp, 8), xmm0); - fld_d(Address(rsp, 8)); - movsd(Address(rsp, 16), xmm6); - fld_d(Address(rsp, 16)); - fmula(1); - faddp(1); - movsd(Address(rsp, 8), xmm4); - fld_d(Address(rsp, 8)); - fmulp(1); - fstp_d(Address(rsp, 8)); - movsd(xmm0, Address(rsp, 8)); - fldcw(Address(rsp, 24)); - pextrw(ecx, xmm0, 3); - andl(ecx, 32752); - cmpl(ecx, 32752); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(ecx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - jmp(L_2TAG_PACKET_2_0_2); - cmpl(ecx, INT_MIN); - jcc(Assembler::below, L_2TAG_PACKET_3_0_2); - cmpl(ecx, -1064950997); - jcc(Assembler::below, L_2TAG_PACKET_2_0_2); - jcc(Assembler::above, L_2TAG_PACKET_4_0_2); - movl(edx, Address(rsp, 128)); - cmpl(edx, -17155601); - jcc(Assembler::below, L_2TAG_PACKET_2_0_2); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movl(edx, 14); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movl(edx, 15); - - bind(L_2TAG_PACKET_5_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 128)); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_7_0_2); - cmpl(eax, 2146435072); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_8_0_2); - movl(eax, Address(rsp, 132)); - cmpl(eax, INT_MIN); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_9_0_2); - movsd(xmm0, Address(tmp, 1208)); // 0xffffffffUL, 0x7fefffffUL - mulsd(xmm0, xmm0); - movl(edx, 14); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_9_0_2); - movsd(xmm0, Address(tmp, 1216)); - mulsd(xmm0, xmm0); - movl(edx, 15); - jmp(L_2TAG_PACKET_5_0_2); - - bind(L_2TAG_PACKET_8_0_2); - movl(edx, Address(rsp, 128)); - cmpl(eax, 2146435072); - jcc(Assembler::above, L_2TAG_PACKET_10_0_2); - cmpl(edx, 0); - jcc(Assembler::notEqual, L_2TAG_PACKET_10_0_2); - movl(eax, Address(rsp, 132)); - cmpl(eax, 2146435072); - jcc(Assembler::notEqual, L_2TAG_PACKET_11_0_2); - movsd(xmm0, Address(tmp, 1192)); // 0x00000000UL, 0x7ff00000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_11_0_2); - movsd(xmm0, Address(tmp, 1200)); // 0x00000000UL, 0x00000000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_10_0_2); - movsd(xmm0, Address(rsp, 128)); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2147483647); - cmpl(eax, 1083179008); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 128)); - addsd(xmm0, Address(tmp, 1184)); // 0x00000000UL, 0x3ff00000UL - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 48), xmm0); - fld_d(Address(rsp, 48)); - - bind(L_2TAG_PACKET_6_0_2); - movl(tmp, Address(rsp, 64)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp deleted file mode 100644 index 515717e2179ca..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log.cpp +++ /dev/null @@ -1,345 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - LOG() -// --------------------- -// -// x=2^k * mx, mx in [1,2) -// -// Get B~1/mx based on the output of rcpss instruction (B0) -// B = int((B0*2^7+0.5))/2^7 -// -// Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts) -// -// Result: k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6) and -// p(r) is a degree 7 polynomial -// -log(B) read from data table (high, low parts) -// Result is formed from high and low parts -// -// Special cases: -// log(NaN) = quiet NaN, and raise invalid exception -// log(+INF) = that INF -// log(0) = -INF with divide-by-zero exception raised -// log(1) = +0 -// log(x) = NaN with invalid exception raised if x < -0, including -INF -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -// -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log[] = -{ - 0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL, - 0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL, - 0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL, - 0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL, - 0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL, - 0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL, - 0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL, - 0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL, - 0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL, - 0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL, - 0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL, - 0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL, - 0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL, - 0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL, - 0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL, - 0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL, - 0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL, - 0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL, - 0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL, - 0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL, - 0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL, - 0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL, - 0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL, - 0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL, - 0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL, - 0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL, - 0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL, - 0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL, - 0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL, - 0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL, - 0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL, - 0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL, - 0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL, - 0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL, - 0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL, - 0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL, - 0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL, - 0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL, - 0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL, - 0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL, - 0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL, - 0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL, - 0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL, - 0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL, - 0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL, - 0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL, - 0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL, - 0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL, - 0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL, - 0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL, - 0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL, - 0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL, - 0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL, - 0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL, - 0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL, - 0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL, - 0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL, - 0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL, - 0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL, - 0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL, - 0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL, - 0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL, - 0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL, - 0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL, - 0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL, - 0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL, - 0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL, - 0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL, - 0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL, - 0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL, - 0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL, - 0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL, - 0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL, - 0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL, - 0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL, - 0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL, - 0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL, - 0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL, - 0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL, - 0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL, - 0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL, - 0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL, - 0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL, - 0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL, - 0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL, - 0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL, - 0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL, - 0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL, - 0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL, - 0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL, - 0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL, - 0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL, - 0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL, - 0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL, - 0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL, - 0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL, - 0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL, - 0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL, - 0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL, - 0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL, - 0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL, - 0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL, - 0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL, - 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL, - 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL, - 0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL, - 0xffffe000UL -}; - -//registers, -// input: xmm0 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) -void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2; - Label L_2TAG_PACKET_10_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - address static_const_table = (address)_static_const_table_log; - - subl(rsp, 104); - movl(Address(rsp, 40), tmp); - lea(tmp, ExternalAddress(static_const_table)); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - xorpd(xmm3, xmm3); - movl(edx, 30704); - pinsrw(xmm3, edx, 3); - movsd(xmm0, Address(rsp, 112)); - movapd(xmm1, xmm0); - movl(ecx, 32768); - movdl(xmm4, ecx); - movsd(xmm5, Address(tmp, 2128)); // 0x00000000UL, 0xffffe000UL - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movl(ecx, 16352); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 228); - psrlq(xmm1, 12); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - - bind(L_2TAG_PACKET_1_0_2); - paddd(xmm0, xmm4); - por(xmm1, xmm3); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm1); - pand(xmm0, xmm6); - subsd(xmm1, xmm5); - mulpd(xmm5, xmm0); - andl(eax, 32752); - subl(eax, ecx); - cvtsi2sdl(xmm7, eax); - mulsd(xmm1, xmm0); - movsd(xmm6, Address(tmp, 2064)); // 0xfefa3800UL, 0x3fa62e42UL - movdqu(xmm3, Address(tmp, 2080)); // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL - subsd(xmm5, xmm2); - andl(edx, 16711680); - shrl(edx, 12); - movdqu(xmm0, Address(tmp, edx)); - movdqu(xmm4, Address(tmp, 2096)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL - addsd(xmm1, xmm5); - movdqu(xmm2, Address(tmp, 2112)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL - mulsd(xmm6, xmm7); - pshufd(xmm5, xmm1, 68); - mulsd(xmm7, Address(tmp, 2072)); // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL - mulsd(xmm3, xmm1); - addsd(xmm0, xmm6); - mulpd(xmm4, xmm5); - mulpd(xmm5, xmm5); - pshufd(xmm6, xmm0, 228); - addsd(xmm0, xmm1); - addpd(xmm4, xmm2); - mulpd(xmm3, xmm5); - subsd(xmm6, xmm0); - mulsd(xmm4, xmm1); - pshufd(xmm2, xmm0, 238); - addsd(xmm1, xmm6); - mulsd(xmm5, xmm5); - addsd(xmm7, xmm2); - addpd(xmm4, xmm3); - addsd(xmm1, xmm7); - mulpd(xmm4, xmm5); - addsd(xmm1, xmm4); - pshufd(xmm5, xmm4, 238); - addsd(xmm1, xmm5); - addsd(xmm0, xmm1); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movsd(xmm0, Address(rsp, 112)); - movdqu(xmm1, xmm0); - addl(eax, 16); - cmpl(eax, 32768); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(eax, 16); - jcc(Assembler::below, L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_6_0_2); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - cmpl(edx, 0); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - jmp(L_2TAG_PACKET_7_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - addl(ecx, ecx); - cmpl(ecx, -2097152); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - - bind(L_2TAG_PACKET_7_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - movl(edx, 3); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_9_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 112)); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_10_0_2); - - bind(L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 49136); - pinsrw(xmm0, eax, 3); - divsd(xmm0, xmm1); - movl(edx, 2); - jmp(L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - movl(eax, 18416); - pinsrw(xmm1, eax, 3); - mulsd(xmm0, xmm1); - movapd(xmm1, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movl(ecx, 18416); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 228); - psrlq(xmm1, 12); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 24), xmm0); - fld_d(Address(rsp, 24)); - - bind(L_2TAG_PACKET_10_0_2); - movl(tmp, Address(rsp, 40)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp deleted file mode 100644 index fa8c3b4623518..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_log10.cpp +++ /dev/null @@ -1,358 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - LOG10() -// --------------------- -// -// Let x=2^k * mx, mx in [1,2) -// -// Get B~1/mx based on the output of rcpss instruction (B0) -// B = int((B0*LH*2^7+0.5))/2^7 -// LH is a short approximation for log10(e) -// -// Reduced argument: r=B*mx-LH (computed accurately in high and low parts) -// -// Result: k*log10(2) - log(B) + p(r) -// p(r) is a degree 7 polynomial -// -log(B) read from data table (high, low parts) -// Result is formed from high and low parts -// -// Special cases: -// log10(0) = -INF with divide-by-zero exception raised -// log10(1) = +0 -// log10(x) = NaN with invalid exception raised if x < -0, including -INF -// log10(+INF) = +INF -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_log10[] = -{ - 0x509f7800UL, 0x3fd34413UL, 0x1f12b358UL, 0x3d1fef31UL, 0x80333400UL, - 0x3fd32418UL, 0xc671d9d0UL, 0xbcf542bfUL, 0x51195000UL, 0x3fd30442UL, - 0x78a4b0c3UL, 0x3d18216aUL, 0x6fc79400UL, 0x3fd2e490UL, 0x80fa389dUL, - 0xbc902869UL, 0x89d04000UL, 0x3fd2c502UL, 0x75c2f564UL, 0x3d040754UL, - 0x4ddd1c00UL, 0x3fd2a598UL, 0xd219b2c3UL, 0xbcfa1d84UL, 0x6baa7c00UL, - 0x3fd28651UL, 0xfd9abec1UL, 0x3d1be6d3UL, 0x94028800UL, 0x3fd2672dUL, - 0xe289a455UL, 0xbd1ede5eUL, 0x78b86400UL, 0x3fd2482cUL, 0x6734d179UL, - 0x3d1fe79bUL, 0xcca3c800UL, 0x3fd2294dUL, 0x981a40b8UL, 0xbced34eaUL, - 0x439c5000UL, 0x3fd20a91UL, 0xcc392737UL, 0xbd1a9cc3UL, 0x92752c00UL, - 0x3fd1ebf6UL, 0x03c9afe7UL, 0x3d1e98f8UL, 0x6ef8dc00UL, 0x3fd1cd7dUL, - 0x71dae7f4UL, 0x3d08a86cUL, 0x8fe4dc00UL, 0x3fd1af25UL, 0xee9185a1UL, - 0xbcff3412UL, 0xace59400UL, 0x3fd190eeUL, 0xc2cab353UL, 0x3cf17ed9UL, - 0x7e925000UL, 0x3fd172d8UL, 0x6952c1b2UL, 0x3cf1521cUL, 0xbe694400UL, - 0x3fd154e2UL, 0xcacb79caUL, 0xbd0bdc78UL, 0x26cbac00UL, 0x3fd1370dUL, - 0xf71f4de1UL, 0xbd01f8beUL, 0x72fa0800UL, 0x3fd11957UL, 0x55bf910bUL, - 0x3c946e2bUL, 0x5f106000UL, 0x3fd0fbc1UL, 0x39e639c1UL, 0x3d14a84bUL, - 0xa802a800UL, 0x3fd0de4aUL, 0xd3f31d5dUL, 0xbd178385UL, 0x0b992000UL, - 0x3fd0c0f3UL, 0x3843106fUL, 0xbd1f602fUL, 0x486ce800UL, 0x3fd0a3baUL, - 0x8819497cUL, 0x3cef987aUL, 0x1de49400UL, 0x3fd086a0UL, 0x1caa0467UL, - 0x3d0faec7UL, 0x4c30cc00UL, 0x3fd069a4UL, 0xa4424372UL, 0xbd1618fcUL, - 0x94490000UL, 0x3fd04cc6UL, 0x946517d2UL, 0xbd18384bUL, 0xb7e84000UL, - 0x3fd03006UL, 0xe0109c37UL, 0xbd19a6acUL, 0x798a0c00UL, 0x3fd01364UL, - 0x5121e864UL, 0xbd164cf7UL, 0x38ce8000UL, 0x3fcfedbfUL, 0x46214d1aUL, - 0xbcbbc402UL, 0xc8e62000UL, 0x3fcfb4efUL, 0xdab93203UL, 0x3d1e0176UL, - 0x2cb02800UL, 0x3fcf7c5aUL, 0x2a2ea8e4UL, 0xbcfec86aUL, 0xeeeaa000UL, - 0x3fcf43fdUL, 0xc18e49a4UL, 0x3cf110a8UL, 0x9bb6e800UL, 0x3fcf0bdaUL, - 0x923cc9c0UL, 0xbd15ce99UL, 0xc093f000UL, 0x3fced3efUL, 0x4d4b51e9UL, - 0x3d1a04c7UL, 0xec58f800UL, 0x3fce9c3cUL, 0x163cad59UL, 0x3cac8260UL, - 0x9a907000UL, 0x3fce2d7dUL, 0x3fa93646UL, 0x3ce4a1c0UL, 0x37311000UL, - 0x3fcdbf99UL, 0x32abd1fdUL, 0x3d07ea9dUL, 0x6744b800UL, 0x3fcd528cUL, - 0x4dcbdfd4UL, 0xbd1b08e2UL, 0xe36de800UL, 0x3fcce653UL, 0x0b7b7f7fUL, - 0xbd1b8f03UL, 0x77506800UL, 0x3fcc7aecUL, 0xa821c9fbUL, 0x3d13c163UL, - 0x00ff8800UL, 0x3fcc1053UL, 0x536bca76UL, 0xbd074ee5UL, 0x70719800UL, - 0x3fcba684UL, 0xd7da9b6bUL, 0xbd1fbf16UL, 0xc6f8d800UL, 0x3fcb3d7dUL, - 0xe2220bb3UL, 0x3d1a295dUL, 0x16c15800UL, 0x3fcad53cUL, 0xe724911eUL, - 0xbcf55822UL, 0x82533800UL, 0x3fca6dbcUL, 0x6d982371UL, 0x3cac567cUL, - 0x3c19e800UL, 0x3fca06fcUL, 0x84d17d80UL, 0x3d1da204UL, 0x85ef8000UL, - 0x3fc9a0f8UL, 0x54466a6aUL, 0xbd002204UL, 0xb0ac2000UL, 0x3fc93baeUL, - 0xd601fd65UL, 0x3d18840cUL, 0x1bb9b000UL, 0x3fc8d71cUL, 0x7bf58766UL, - 0xbd14f897UL, 0x34aae800UL, 0x3fc8733eUL, 0x3af6ac24UL, 0xbd0f5c45UL, - 0x76d68000UL, 0x3fc81012UL, 0x4303e1a1UL, 0xbd1f9a80UL, 0x6af57800UL, - 0x3fc7ad96UL, 0x43fbcb46UL, 0x3cf4c33eUL, 0xa6c51000UL, 0x3fc74bc7UL, - 0x70f0eac5UL, 0xbd192e3bUL, 0xccab9800UL, 0x3fc6eaa3UL, 0xc0093dfeUL, - 0xbd0faf15UL, 0x8b60b800UL, 0x3fc68a28UL, 0xde78d5fdUL, 0xbc9ea4eeUL, - 0x9d987000UL, 0x3fc62a53UL, 0x962bea6eUL, 0xbd194084UL, 0xc9b0e800UL, - 0x3fc5cb22UL, 0x888dd999UL, 0x3d1fe201UL, 0xe1634800UL, 0x3fc56c93UL, - 0x16ada7adUL, 0x3d1b1188UL, 0xc176c000UL, 0x3fc50ea4UL, 0x4159b5b5UL, - 0xbcf09c08UL, 0x51766000UL, 0x3fc4b153UL, 0x84393d23UL, 0xbcf6a89cUL, - 0x83695000UL, 0x3fc4549dUL, 0x9f0b8bbbUL, 0x3d1c4b8cUL, 0x538d5800UL, - 0x3fc3f881UL, 0xf49df747UL, 0x3cf89b99UL, 0xc8138000UL, 0x3fc39cfcUL, - 0xd503b834UL, 0xbd13b99fUL, 0xf0df0800UL, 0x3fc3420dUL, 0xf011b386UL, - 0xbd05d8beUL, 0xe7466800UL, 0x3fc2e7b2UL, 0xf39c7bc2UL, 0xbd1bb94eUL, - 0xcdd62800UL, 0x3fc28de9UL, 0x05e6d69bUL, 0xbd10ed05UL, 0xd015d800UL, - 0x3fc234b0UL, 0xe29b6c9dUL, 0xbd1ff967UL, 0x224ea800UL, 0x3fc1dc06UL, - 0x727711fcUL, 0xbcffb30dUL, 0x01540000UL, 0x3fc183e8UL, 0x39786c5aUL, - 0x3cc23f57UL, 0xb24d9800UL, 0x3fc12c54UL, 0xc905a342UL, 0x3d003a1dUL, - 0x82835800UL, 0x3fc0d54aUL, 0x9b9920c0UL, 0x3d03b25aUL, 0xc72ac000UL, - 0x3fc07ec7UL, 0x46f26a24UL, 0x3cf0fa41UL, 0xdd35d800UL, 0x3fc028caUL, - 0x41d9d6dcUL, 0x3d034a65UL, 0x52474000UL, 0x3fbfa6a4UL, 0x44f66449UL, - 0x3d19cad3UL, 0x2da3d000UL, 0x3fbefcb8UL, 0x67832999UL, 0x3d18400fUL, - 0x32a10000UL, 0x3fbe53ceUL, 0x9c0e3b1aUL, 0xbcff62fdUL, 0x556b7000UL, - 0x3fbdabe3UL, 0x02976913UL, 0xbcf8243bUL, 0x97e88000UL, 0x3fbd04f4UL, - 0xec793797UL, 0x3d1c0578UL, 0x09647000UL, 0x3fbc5effUL, 0x05fc0565UL, - 0xbd1d799eUL, 0xc6426000UL, 0x3fbbb9ffUL, 0x4625f5edUL, 0x3d1f5723UL, - 0xf7afd000UL, 0x3fbb15f3UL, 0xdd5aae61UL, 0xbd1a7e1eUL, 0xd358b000UL, - 0x3fba72d8UL, 0x3314e4d3UL, 0x3d17bc91UL, 0x9b1f5000UL, 0x3fb9d0abUL, - 0x9a4d514bUL, 0x3cf18c9bUL, 0x9cd4e000UL, 0x3fb92f69UL, 0x7e4496abUL, - 0x3cf1f96dUL, 0x31f4f000UL, 0x3fb88f10UL, 0xf56479e7UL, 0x3d165818UL, - 0xbf628000UL, 0x3fb7ef9cUL, 0x26bf486dUL, 0xbd1113a6UL, 0xb526b000UL, - 0x3fb7510cUL, 0x1a1c3384UL, 0x3ca9898dUL, 0x8e31e000UL, 0x3fb6b35dUL, - 0xb3875361UL, 0xbd0661acUL, 0xd01de000UL, 0x3fb6168cUL, 0x2a7cacfaUL, - 0xbd1bdf10UL, 0x0af23000UL, 0x3fb57a98UL, 0xff868816UL, 0x3cf046d0UL, - 0xd8ea0000UL, 0x3fb4df7cUL, 0x1515fbe7UL, 0xbd1fd529UL, 0xde3b2000UL, - 0x3fb44538UL, 0x6e59a132UL, 0x3d1faeeeUL, 0xc8df9000UL, 0x3fb3abc9UL, - 0xf1322361UL, 0xbd198807UL, 0x505f1000UL, 0x3fb3132dUL, 0x0888e6abUL, - 0x3d1e5380UL, 0x359bd000UL, 0x3fb27b61UL, 0xdfbcbb22UL, 0xbcfe2724UL, - 0x429ee000UL, 0x3fb1e463UL, 0x6eb4c58cUL, 0xbcfe4dd6UL, 0x4a673000UL, - 0x3fb14e31UL, 0x4ce1ac9bUL, 0x3d1ba691UL, 0x28b96000UL, 0x3fb0b8c9UL, - 0x8c7813b8UL, 0xbd0b3872UL, 0xc1f08000UL, 0x3fb02428UL, 0xc2bc8c2cUL, - 0x3cb5ea6bUL, 0x05a1a000UL, 0x3faf209cUL, 0x72e8f18eUL, 0xbce8df84UL, - 0xc0b5e000UL, 0x3fadfa6dUL, 0x9fdef436UL, 0x3d087364UL, 0xaf416000UL, - 0x3facd5c2UL, 0x1068c3a9UL, 0x3d0827e7UL, 0xdb356000UL, 0x3fabb296UL, - 0x120a34d3UL, 0x3d101a9fUL, 0x5dfea000UL, 0x3faa90e6UL, 0xdaded264UL, - 0xbd14c392UL, 0x6034c000UL, 0x3fa970adUL, 0x1c9d06a9UL, 0xbd1b705eUL, - 0x194c6000UL, 0x3fa851e8UL, 0x83996ad9UL, 0xbd0117bcUL, 0xcf4ac000UL, - 0x3fa73492UL, 0xb1a94a62UL, 0xbca5ea42UL, 0xd67b4000UL, 0x3fa618a9UL, - 0x75aed8caUL, 0xbd07119bUL, 0x9126c000UL, 0x3fa4fe29UL, 0x5291d533UL, - 0x3d12658fUL, 0x6f4d4000UL, 0x3fa3e50eUL, 0xcd2c5cd9UL, 0x3d1d5c70UL, - 0xee608000UL, 0x3fa2cd54UL, 0xd1008489UL, 0x3d1a4802UL, 0x9900e000UL, - 0x3fa1b6f9UL, 0x54fb5598UL, 0xbd16593fUL, 0x06bb6000UL, 0x3fa0a1f9UL, - 0x64ef57b4UL, 0xbd17636bUL, 0xb7940000UL, 0x3f9f1c9fUL, 0xee6a4737UL, - 0x3cb5d479UL, 0x91aa0000UL, 0x3f9cf7f5UL, 0x3a16373cUL, 0x3d087114UL, - 0x156b8000UL, 0x3f9ad5edUL, 0x836c554aUL, 0x3c6900b0UL, 0xd4764000UL, - 0x3f98b67fUL, 0xed12f17bUL, 0xbcffc974UL, 0x77dec000UL, 0x3f9699a7UL, - 0x232ce7eaUL, 0x3d1e35bbUL, 0xbfbf4000UL, 0x3f947f5dUL, 0xd84ffa6eUL, - 0x3d0e0a49UL, 0x82c7c000UL, 0x3f92679cUL, 0x8d170e90UL, 0xbd14d9f2UL, - 0xadd20000UL, 0x3f90525dUL, 0x86d9f88eUL, 0x3cdeb986UL, 0x86f10000UL, - 0x3f8c7f36UL, 0xb9e0a517UL, 0x3ce29faaUL, 0xb75c8000UL, 0x3f885e9eUL, - 0x542568cbUL, 0xbd1f7bdbUL, 0x46b30000UL, 0x3f8442e8UL, 0xb954e7d9UL, - 0x3d1e5287UL, 0xb7e60000UL, 0x3f802c07UL, 0x22da0b17UL, 0xbd19fb27UL, - 0x6c8b0000UL, 0x3f7833e3UL, 0x821271efUL, 0xbd190f96UL, 0x29910000UL, - 0x3f701936UL, 0xbc3491a5UL, 0xbd1bcf45UL, 0x354a0000UL, 0x3f600fe3UL, - 0xc0ff520aUL, 0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL, 0x3cdfef31UL, - 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL, 0xc0089309UL, 0x385593b1UL, - 0xc025c917UL, 0xdc963467UL, 0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, - 0xdc77b115UL, 0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL, - 0xffffe000UL, 0x00000000UL, 0x3fdbc000UL, 0xbf2e4108UL, 0x3f5a7a6cUL -}; -//registers, -// input: xmm0 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// rax, rdx, rcx, rbx (tmp) - -void MacroAssembler::fast_log10(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ecx, Register edx, Register tmp) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_log10 = (address)_static_const_table_log10; - - subl(rsp, 104); - movl(Address(rsp, 40), tmp); - lea(tmp, ExternalAddress(static_const_table_log10)); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movl(ecx, 1054736384); - movdl(xmm7, ecx); - xorpd(xmm3, xmm3); - movl(edx, 30704); - pinsrw(xmm3, edx, 3); - movsd(xmm0, Address(rsp, 112)); - movdqu(xmm1, xmm0); - movl(edx, 32768); - movdl(xmm4, edx); - movdqu(xmm5, Address(tmp, 2128)); //0x3ffc6a02UL, 0x7f9d3aa1UL, 0x4016ab9fUL, 0xdc77b115UL - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 16352); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 78); - psrlq(xmm1, 12); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - - bind(L_2TAG_PACKET_1_0_2); - mulss(xmm0, xmm7); - por(xmm1, xmm3); - andpd(xmm5, xmm1); - paddd(xmm0, xmm4); - subsd(xmm1, xmm5); - movdl(edx, xmm0); - psllq(xmm0, 29); - andpd(xmm0, xmm6); - andl(eax, 32752); - subl(eax, ecx); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - mulsd(xmm1, xmm0); - movsd(xmm6, Address(tmp, 2064)); //0xbd19d71cUL, 0x00000000UL, 0x00000000UL, 0x00000000UL - movdqu(xmm3, Address(tmp, 2080)); //0x00000000UL, 0x509f7800UL, 0x3f934413UL, 0x1f12b358UL - subsd(xmm5, xmm2); - andl(edx, 16711680); - shrl(edx, 12); - movdqu(xmm0, Address(tmp, edx, Address::times_1, -1504)); - movdqu(xmm4, Address(tmp, 2096)); //0x3cdfef31UL, 0xc1a5f12eUL, 0x40358874UL, 0x64d4ef0dUL - addsd(xmm1, xmm5); - movdqu(xmm2, Address(tmp, 2112)); //0xc0089309UL, 0x385593b1UL, 0xc025c917UL, 0xdc963467UL - mulsd(xmm6, xmm7); - pshufd(xmm5, xmm1, 68); - mulsd(xmm7, Address(tmp, 2072)); //0x00000000UL, 0x00000000UL, 0x00000000UL, 0x509f7800UL - mulsd(xmm3, xmm1); - addsd(xmm0, xmm6); - mulpd(xmm4, xmm5); - movsd(xmm6, Address(tmp, 2152)); //0xffffffffUL, 0x00000000UL, 0xffffe000UL, 0x00000000UL - mulpd(xmm5, xmm5); - addpd(xmm4, xmm2); - mulpd(xmm3, xmm5); - pshufd(xmm2, xmm0, 228); - addsd(xmm0, xmm1); - mulsd(xmm4, xmm1); - subsd(xmm2, xmm0); - mulsd(xmm6, xmm1); - addsd(xmm1, xmm2); - pshufd(xmm2, xmm0, 238); - mulsd(xmm5, xmm5); - addsd(xmm7, xmm2); - addsd(xmm1, xmm6); - addpd(xmm4, xmm3); - addsd(xmm1, xmm7); - mulpd(xmm4, xmm5); - addsd(xmm1, xmm4); - pshufd(xmm5, xmm4, 238); - addsd(xmm1, xmm5); - addsd(xmm0, xmm1); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_0_0_2); - movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL - movdqu(xmm1, xmm0); - addl(eax, 16); - cmpl(eax, 32768); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2); - cmpl(eax, 16); - jcc(Assembler::below, L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_2_0_2); - - bind(L_2TAG_PACKET_6_0_2); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - cmpl(edx, 0); - jcc(Assembler::above, L_2TAG_PACKET_5_0_2); - jmp(L_2TAG_PACKET_7_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - addl(ecx, ecx); - cmpl(ecx, -2097152); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - - bind(L_2TAG_PACKET_7_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - movl(edx, 9); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_9_0_2); - movsd(Address(rsp, 0), xmm0); - movsd(xmm0, Address(rsp, 112)); //0xbcfa1d84UL, 0x6baa7c00UL, 0x3fd28651UL, 0xfd9abec1UL - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_10_0_2); - - bind(L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - xorpd(xmm0, xmm0); - movl(eax, 49136); - pinsrw(xmm0, eax, 3); - divsd(xmm0, xmm1); - movl(edx, 8); - jmp(L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movdl(edx, xmm1); - psrlq(xmm1, 32); - movdl(ecx, xmm1); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - xorpd(xmm1, xmm1); - movl(eax, 18416); - pinsrw(xmm1, eax, 3); - mulsd(xmm0, xmm1); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm1, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 2144)); //0xbff27af2UL, 0xf8000000UL, 0xffffffffUL, 0x00000000UL - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm1, 12); - pshufd(xmm6, xmm5, 78); - psrlq(xmm1, 12); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movsd(Address(rsp, 24), xmm0); - fld_d(Address(rsp, 24)); - - bind(L_2TAG_PACKET_10_0_2); - movl(tmp, Address(rsp, 40)); - -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp deleted file mode 100644 index 7afad2fcc73b2..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_pow.cpp +++ /dev/null @@ -1,1856 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - POW() -// --------------------- -// -// Let x=2^k * mx, mx in [1,2) -// -// log2(x) calculation: -// -// Get B~1/mx based on the output of rcpps instruction (B0) -// B = int((B0*LH*2^9+0.5))/2^9 -// LH is a short approximation for log2(e) -// -// Reduced argument, scaled by LH: -// r=B*mx-LH (computed accurately in high and low parts) -// -// log2(x) result: k - log2(B) + p(r) -// p(r) is a degree 8 polynomial -// -log2(B) read from data table (high, low parts) -// log2(x) is formed from high and low parts -// For |x| in [1-1/32, 1+1/16), a slower but more accurate computation -// based om the same table design is performed. -// -// Main path is taken if | floor(log2(|log2(|x|)|) + floor(log2|y|) | < 8, -// to filter out all potential OF/UF cases. -// exp2(y*log2(x)) is computed using an 8-bit index table and a degree 5 -// polynomial -// -// Special cases: -// pow(-0,y) = -INF and raises the divide-by-zero exception for y an odd -// integer < 0. -// pow(-0,y) = +INF and raises the divide-by-zero exception for y < 0 and -// not an odd integer. -// pow(-0,y) = -0 for y an odd integer > 0. -// pow(-0,y) = +0 for y > 0 and not an odd integer. -// pow(-1,-INF) = NaN. -// pow(+1,y) = NaN for any y, even a NaN. -// pow(x,-0) = 1 for any x, even a NaN. -// pow(x,y) = a NaN and raises the invalid exception for finite x < 0 and -// finite non-integer y. -// pow(x,-INF) = +INF for |x|<1. -// pow(x,-INF) = +0 for |x|>1. -// pow(x,+INF) = +0 for |x|<1. -// pow(x,+INF) = +INF for |x|>1. -// pow(-INF,y) = -0 for y an odd integer < 0. -// pow(-INF,y) = +0 for y < 0 and not an odd integer. -// pow(-INF,y) = -INF for y an odd integer > 0. -// pow(-INF,y) = +INF for y > 0 and not an odd integer. -// pow(+INF,y) = +0 for y <0. -// pow(+INF,y) = +INF for y >0. -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_pow[] = -{ - 0x00000000UL, 0xbfd61a00UL, 0x00000000UL, 0xbf5dabe1UL, 0xf8000000UL, - 0xffffffffUL, 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0x00000000UL, 0x20000000UL, 0x3feff00aUL, 0x96621f95UL, - 0x3e5b1856UL, 0xe0000000UL, 0x3fefe019UL, 0xe5916f9eUL, 0xbe325278UL, - 0x00000000UL, 0x3fefd02fUL, 0x859a1062UL, 0x3e595fb7UL, 0xc0000000UL, - 0x3fefc049UL, 0xb245f18fUL, 0xbe529c38UL, 0xe0000000UL, 0x3fefb069UL, - 0xad2880a7UL, 0xbe501230UL, 0x60000000UL, 0x3fefa08fUL, 0xc8e72420UL, - 0x3e597bd1UL, 0x80000000UL, 0x3fef90baUL, 0xc30c4500UL, 0xbe5d6c75UL, - 0xe0000000UL, 0x3fef80eaUL, 0x02c63f43UL, 0x3e2e1318UL, 0xc0000000UL, - 0x3fef7120UL, 0xb3d4ccccUL, 0xbe44c52aUL, 0x00000000UL, 0x3fef615cUL, - 0xdbd91397UL, 0xbe4e7d6cUL, 0xa0000000UL, 0x3fef519cUL, 0x65c5cd68UL, - 0xbe522dc8UL, 0xa0000000UL, 0x3fef41e2UL, 0x46d1306cUL, 0xbe5a840eUL, - 0xe0000000UL, 0x3fef322dUL, 0xd2980e94UL, 0x3e5071afUL, 0xa0000000UL, - 0x3fef227eUL, 0x773abadeUL, 0xbe5891e5UL, 0xa0000000UL, 0x3fef12d4UL, - 0xdc6bf46bUL, 0xbe5cccbeUL, 0xe0000000UL, 0x3fef032fUL, 0xbc7247faUL, - 0xbe2bab83UL, 0x80000000UL, 0x3feef390UL, 0xbcaa1e46UL, 0xbe53bb3bUL, - 0x60000000UL, 0x3feee3f6UL, 0x5f6c682dUL, 0xbe54c619UL, 0x80000000UL, - 0x3feed461UL, 0x5141e368UL, 0xbe4b6d86UL, 0xe0000000UL, 0x3feec4d1UL, - 0xec678f76UL, 0xbe369af6UL, 0x80000000UL, 0x3feeb547UL, 0x41301f55UL, - 0xbe2d4312UL, 0x60000000UL, 0x3feea5c2UL, 0x676da6bdUL, 0xbe4d8dd0UL, - 0x60000000UL, 0x3fee9642UL, 0x57a891c4UL, 0x3e51f991UL, 0xa0000000UL, - 0x3fee86c7UL, 0xe4eb491eUL, 0x3e579bf9UL, 0x20000000UL, 0x3fee7752UL, - 0xfddc4a2cUL, 0xbe3356e6UL, 0xc0000000UL, 0x3fee67e1UL, 0xd75b5bf1UL, - 0xbe449531UL, 0x80000000UL, 0x3fee5876UL, 0xbd423b8eUL, 0x3df54fe4UL, - 0x60000000UL, 0x3fee4910UL, 0x330e51b9UL, 0x3e54289cUL, 0x80000000UL, - 0x3fee39afUL, 0x8651a95fUL, 0xbe55aad6UL, 0xa0000000UL, 0x3fee2a53UL, - 0x5e98c708UL, 0xbe2fc4a9UL, 0xe0000000UL, 0x3fee1afcUL, 0x0989328dUL, - 0x3e23958cUL, 0x40000000UL, 0x3fee0babUL, 0xee642abdUL, 0xbe425dd8UL, - 0xa0000000UL, 0x3fedfc5eUL, 0xc394d236UL, 0x3e526362UL, 0x20000000UL, - 0x3feded17UL, 0xe104aa8eUL, 0x3e4ce247UL, 0xc0000000UL, 0x3fedddd4UL, - 0x265a9be4UL, 0xbe5bb77aUL, 0x40000000UL, 0x3fedce97UL, 0x0ecac52fUL, - 0x3e4a7cb1UL, 0xe0000000UL, 0x3fedbf5eUL, 0x124cb3b8UL, 0x3e257024UL, - 0x80000000UL, 0x3fedb02bUL, 0xe6d4febeUL, 0xbe2033eeUL, 0x20000000UL, - 0x3feda0fdUL, 0x39cca00eUL, 0xbe3ddabcUL, 0xc0000000UL, 0x3fed91d3UL, - 0xef8a552aUL, 0xbe543390UL, 0x40000000UL, 0x3fed82afUL, 0xb8e85204UL, - 0x3e513850UL, 0xe0000000UL, 0x3fed738fUL, 0x3d59fe08UL, 0xbe5db728UL, - 0x40000000UL, 0x3fed6475UL, 0x3aa7ead1UL, 0x3e58804bUL, 0xc0000000UL, - 0x3fed555fUL, 0xf8a35ba9UL, 0xbe5298b0UL, 0x00000000UL, 0x3fed464fUL, - 0x9a88dd15UL, 0x3e5a8cdbUL, 0x40000000UL, 0x3fed3743UL, 0xb0b0a190UL, - 0x3e598635UL, 0x80000000UL, 0x3fed283cUL, 0xe2113295UL, 0xbe5c1119UL, - 0x80000000UL, 0x3fed193aUL, 0xafbf1728UL, 0xbe492e9cUL, 0x60000000UL, - 0x3fed0a3dUL, 0xe4a4ccf3UL, 0x3e19b90eUL, 0x20000000UL, 0x3fecfb45UL, - 0xba3cbeb8UL, 0x3e406b50UL, 0xc0000000UL, 0x3fecec51UL, 0x110f7dddUL, - 0x3e0d6806UL, 0x40000000UL, 0x3fecdd63UL, 0x7dd7d508UL, 0xbe5a8943UL, - 0x80000000UL, 0x3fecce79UL, 0x9b60f271UL, 0xbe50676aUL, 0x80000000UL, - 0x3fecbf94UL, 0x0b9ad660UL, 0x3e59174fUL, 0x60000000UL, 0x3fecb0b4UL, - 0x00823d9cUL, 0x3e5bbf72UL, 0x20000000UL, 0x3feca1d9UL, 0x38a6ec89UL, - 0xbe4d38f9UL, 0x80000000UL, 0x3fec9302UL, 0x3a0b7d8eUL, 0x3e53dbfdUL, - 0xc0000000UL, 0x3fec8430UL, 0xc6826b34UL, 0xbe27c5c9UL, 0xc0000000UL, - 0x3fec7563UL, 0x0c706381UL, 0xbe593653UL, 0x60000000UL, 0x3fec669bUL, - 0x7df34ec7UL, 0x3e461ab5UL, 0xe0000000UL, 0x3fec57d7UL, 0x40e5e7e8UL, - 0xbe5c3daeUL, 0x00000000UL, 0x3fec4919UL, 0x5602770fUL, 0xbe55219dUL, - 0xc0000000UL, 0x3fec3a5eUL, 0xec7911ebUL, 0x3e5a5d25UL, 0x60000000UL, - 0x3fec2ba9UL, 0xb39ea225UL, 0xbe53c00bUL, 0x80000000UL, 0x3fec1cf8UL, - 0x967a212eUL, 0x3e5a8ddfUL, 0x60000000UL, 0x3fec0e4cUL, 0x580798bdUL, - 0x3e5f53abUL, 0x00000000UL, 0x3febffa5UL, 0xb8282df6UL, 0xbe46b874UL, - 0x20000000UL, 0x3febf102UL, 0xe33a6729UL, 0x3e54963fUL, 0x00000000UL, - 0x3febe264UL, 0x3b53e88aUL, 0xbe3adce1UL, 0x60000000UL, 0x3febd3caUL, - 0xc2585084UL, 0x3e5cde9fUL, 0x80000000UL, 0x3febc535UL, 0xa335c5eeUL, - 0xbe39fd9cUL, 0x20000000UL, 0x3febb6a5UL, 0x7325b04dUL, 0x3e42ba15UL, - 0x60000000UL, 0x3feba819UL, 0x1564540fUL, 0x3e3a9f35UL, 0x40000000UL, - 0x3feb9992UL, 0x83fff592UL, 0xbe5465ceUL, 0xa0000000UL, 0x3feb8b0fUL, - 0xb9da63d3UL, 0xbe4b1a0aUL, 0x80000000UL, 0x3feb7c91UL, 0x6d6f1ea4UL, - 0x3e557657UL, 0x00000000UL, 0x3feb6e18UL, 0x5e80a1bfUL, 0x3e4ddbb6UL, - 0x00000000UL, 0x3feb5fa3UL, 0x1c9eacb5UL, 0x3e592877UL, 0xa0000000UL, - 0x3feb5132UL, 0x6d40beb3UL, 0xbe51858cUL, 0xa0000000UL, 0x3feb42c6UL, - 0xd740c67bUL, 0x3e427ad2UL, 0x40000000UL, 0x3feb345fUL, 0xa3e0cceeUL, - 0xbe5c2fc4UL, 0x40000000UL, 0x3feb25fcUL, 0x8e752b50UL, 0xbe3da3c2UL, - 0xc0000000UL, 0x3feb179dUL, 0xa892e7deUL, 0x3e1fb481UL, 0xc0000000UL, - 0x3feb0943UL, 0x21ed71e9UL, 0xbe365206UL, 0x20000000UL, 0x3feafaeeUL, - 0x0e1380a3UL, 0x3e5c5b7bUL, 0x20000000UL, 0x3feaec9dUL, 0x3c3d640eUL, - 0xbe5dbbd0UL, 0x60000000UL, 0x3feade50UL, 0x8f97a715UL, 0x3e3a8ec5UL, - 0x20000000UL, 0x3fead008UL, 0x23ab2839UL, 0x3e2fe98aUL, 0x40000000UL, - 0x3feac1c4UL, 0xf4bbd50fUL, 0x3e54d8f6UL, 0xe0000000UL, 0x3feab384UL, - 0x14757c4dUL, 0xbe48774cUL, 0xc0000000UL, 0x3feaa549UL, 0x7c7b0eeaUL, - 0x3e5b51bbUL, 0x20000000UL, 0x3fea9713UL, 0xf56f7013UL, 0x3e386200UL, - 0xe0000000UL, 0x3fea88e0UL, 0xbe428ebeUL, 0xbe514af5UL, 0xe0000000UL, - 0x3fea7ab2UL, 0x8d0e4496UL, 0x3e4f9165UL, 0x60000000UL, 0x3fea6c89UL, - 0xdbacc5d5UL, 0xbe5c063bUL, 0x20000000UL, 0x3fea5e64UL, 0x3f19d970UL, - 0xbe5a0c8cUL, 0x20000000UL, 0x3fea5043UL, 0x09ea3e6bUL, 0x3e5065dcUL, - 0x80000000UL, 0x3fea4226UL, 0x78df246cUL, 0x3e5e05f6UL, 0x40000000UL, - 0x3fea340eUL, 0x4057d4a0UL, 0x3e431b2bUL, 0x40000000UL, 0x3fea25faUL, - 0x82867bb5UL, 0x3e4b76beUL, 0xa0000000UL, 0x3fea17eaUL, 0x9436f40aUL, - 0xbe5aad39UL, 0x20000000UL, 0x3fea09dfUL, 0x4b5253b3UL, 0x3e46380bUL, - 0x00000000UL, 0x3fe9fbd8UL, 0x8fc52466UL, 0xbe386f9bUL, 0x20000000UL, - 0x3fe9edd5UL, 0x22d3f344UL, 0xbe538347UL, 0x60000000UL, 0x3fe9dfd6UL, - 0x1ac33522UL, 0x3e5dbc53UL, 0x00000000UL, 0x3fe9d1dcUL, 0xeabdff1dUL, - 0x3e40fc0cUL, 0xe0000000UL, 0x3fe9c3e5UL, 0xafd30e73UL, 0xbe585e63UL, - 0xe0000000UL, 0x3fe9b5f3UL, 0xa52f226aUL, 0xbe43e8f9UL, 0x20000000UL, - 0x3fe9a806UL, 0xecb8698dUL, 0xbe515b36UL, 0x80000000UL, 0x3fe99a1cUL, - 0xf2b4e89dUL, 0x3e48b62bUL, 0x20000000UL, 0x3fe98c37UL, 0x7c9a88fbUL, - 0x3e44414cUL, 0x00000000UL, 0x3fe97e56UL, 0xda015741UL, 0xbe5d13baUL, - 0xe0000000UL, 0x3fe97078UL, 0x5fdace06UL, 0x3e51b947UL, 0x00000000UL, - 0x3fe962a0UL, 0x956ca094UL, 0x3e518785UL, 0x40000000UL, 0x3fe954cbUL, - 0x01164c1dUL, 0x3e5d5b57UL, 0xc0000000UL, 0x3fe946faUL, 0xe63b3767UL, - 0xbe4f84e7UL, 0x40000000UL, 0x3fe9392eUL, 0xe57cc2a9UL, 0x3e34eda3UL, - 0xe0000000UL, 0x3fe92b65UL, 0x8c75b544UL, 0x3e5766a0UL, 0xc0000000UL, - 0x3fe91da1UL, 0x37d1d087UL, 0xbe5e2ab1UL, 0x80000000UL, 0x3fe90fe1UL, - 0xa953dc20UL, 0x3e5fa1f3UL, 0x80000000UL, 0x3fe90225UL, 0xdbd3f369UL, - 0x3e47d6dbUL, 0xa0000000UL, 0x3fe8f46dUL, 0x1c9be989UL, 0xbe5e2b0aUL, - 0xa0000000UL, 0x3fe8e6b9UL, 0x3c93d76aUL, 0x3e5c8618UL, 0xe0000000UL, - 0x3fe8d909UL, 0x2182fc9aUL, 0xbe41aa9eUL, 0x20000000UL, 0x3fe8cb5eUL, - 0xe6b3539dUL, 0xbe530d19UL, 0x60000000UL, 0x3fe8bdb6UL, 0x49e58cc3UL, - 0xbe3bb374UL, 0xa0000000UL, 0x3fe8b012UL, 0xa7cfeb8fUL, 0x3e56c412UL, - 0x00000000UL, 0x3fe8a273UL, 0x8d52bc19UL, 0x3e1429b8UL, 0x60000000UL, - 0x3fe894d7UL, 0x4dc32c6cUL, 0xbe48604cUL, 0xc0000000UL, 0x3fe8873fUL, - 0x0c868e56UL, 0xbe564ee5UL, 0x00000000UL, 0x3fe879acUL, 0x56aee828UL, - 0x3e5e2fd8UL, 0x60000000UL, 0x3fe86c1cUL, 0x7ceab8ecUL, 0x3e493365UL, - 0xc0000000UL, 0x3fe85e90UL, 0x78d4dadcUL, 0xbe4f7f25UL, 0x00000000UL, - 0x3fe85109UL, 0x0ccd8280UL, 0x3e31e7a2UL, 0x40000000UL, 0x3fe84385UL, - 0x34ba4e15UL, 0x3e328077UL, 0x80000000UL, 0x3fe83605UL, 0xa670975aUL, - 0xbe53eee5UL, 0xa0000000UL, 0x3fe82889UL, 0xf61b77b2UL, 0xbe43a20aUL, - 0xa0000000UL, 0x3fe81b11UL, 0x13e6643bUL, 0x3e5e5fe5UL, 0xc0000000UL, - 0x3fe80d9dUL, 0x82cc94e8UL, 0xbe5ff1f9UL, 0xa0000000UL, 0x3fe8002dUL, - 0x8a0c9c5dUL, 0xbe42b0e7UL, 0x60000000UL, 0x3fe7f2c1UL, 0x22a16f01UL, - 0x3e5d9ea0UL, 0x20000000UL, 0x3fe7e559UL, 0xc38cd451UL, 0x3e506963UL, - 0xc0000000UL, 0x3fe7d7f4UL, 0x9902bc71UL, 0x3e4503d7UL, 0x40000000UL, - 0x3fe7ca94UL, 0xdef2a3c0UL, 0x3e3d98edUL, 0xa0000000UL, 0x3fe7bd37UL, - 0xed49abb0UL, 0x3e24c1ffUL, 0xe0000000UL, 0x3fe7afdeUL, 0xe3b0be70UL, - 0xbe40c467UL, 0x00000000UL, 0x3fe7a28aUL, 0xaf9f193cUL, 0xbe5dff6cUL, - 0xe0000000UL, 0x3fe79538UL, 0xb74cf6b6UL, 0xbe258ed0UL, 0xa0000000UL, - 0x3fe787ebUL, 0x1d9127c7UL, 0x3e345fb0UL, 0x40000000UL, 0x3fe77aa2UL, - 0x1028c21dUL, 0xbe4619bdUL, 0xa0000000UL, 0x3fe76d5cUL, 0x7cb0b5e4UL, - 0x3e40f1a2UL, 0xe0000000UL, 0x3fe7601aUL, 0x2b1bc4adUL, 0xbe32e8bbUL, - 0xe0000000UL, 0x3fe752dcUL, 0x6839f64eUL, 0x3e41f57bUL, 0xc0000000UL, - 0x3fe745a2UL, 0xc4121f7eUL, 0xbe52c40aUL, 0x60000000UL, 0x3fe7386cUL, - 0xd6852d72UL, 0xbe5c4e6bUL, 0xc0000000UL, 0x3fe72b39UL, 0x91d690f7UL, - 0xbe57f88fUL, 0xe0000000UL, 0x3fe71e0aUL, 0x627a2159UL, 0xbe4425d5UL, - 0xc0000000UL, 0x3fe710dfUL, 0x50a54033UL, 0x3e422b7eUL, 0x60000000UL, - 0x3fe703b8UL, 0x3b0b5f91UL, 0x3e5d3857UL, 0xe0000000UL, 0x3fe6f694UL, - 0x84d628a2UL, 0xbe51f090UL, 0x00000000UL, 0x3fe6e975UL, 0x306d8894UL, - 0xbe414d83UL, 0xe0000000UL, 0x3fe6dc58UL, 0x30bf24aaUL, 0xbe4650caUL, - 0x80000000UL, 0x3fe6cf40UL, 0xd4628d69UL, 0xbe5db007UL, 0xc0000000UL, - 0x3fe6c22bUL, 0xa2aae57bUL, 0xbe31d279UL, 0xc0000000UL, 0x3fe6b51aUL, - 0x860edf7eUL, 0xbe2d4c4aUL, 0x80000000UL, 0x3fe6a80dUL, 0xf3559341UL, - 0xbe5f7e98UL, 0xe0000000UL, 0x3fe69b03UL, 0xa885899eUL, 0xbe5c2011UL, - 0xe0000000UL, 0x3fe68dfdUL, 0x2bdc6d37UL, 0x3e224a82UL, 0xa0000000UL, - 0x3fe680fbUL, 0xc12ad1b9UL, 0xbe40cf56UL, 0x00000000UL, 0x3fe673fdUL, - 0x1bcdf659UL, 0xbdf52f2dUL, 0x00000000UL, 0x3fe66702UL, 0x5df10408UL, - 0x3e5663e0UL, 0xc0000000UL, 0x3fe65a0aUL, 0xa4070568UL, 0xbe40b12fUL, - 0x00000000UL, 0x3fe64d17UL, 0x71c54c47UL, 0x3e5f5e8bUL, 0x00000000UL, - 0x3fe64027UL, 0xbd4b7e83UL, 0x3e42ead6UL, 0xa0000000UL, 0x3fe6333aUL, - 0x61598bd2UL, 0xbe4c48d4UL, 0xc0000000UL, 0x3fe62651UL, 0x6f538d61UL, - 0x3e548401UL, 0xa0000000UL, 0x3fe6196cUL, 0x14344120UL, 0xbe529af6UL, - 0x00000000UL, 0x3fe60c8bUL, 0x5982c587UL, 0xbe3e1e4fUL, 0x00000000UL, - 0x3fe5ffadUL, 0xfe51d4eaUL, 0xbe4c897aUL, 0x80000000UL, 0x3fe5f2d2UL, - 0xfd46ebe1UL, 0x3e552e00UL, 0xa0000000UL, 0x3fe5e5fbUL, 0xa4695699UL, - 0x3e5ed471UL, 0x60000000UL, 0x3fe5d928UL, 0x80d118aeUL, 0x3e456b61UL, - 0xa0000000UL, 0x3fe5cc58UL, 0x304c330bUL, 0x3e54dc29UL, 0x80000000UL, - 0x3fe5bf8cUL, 0x0af2dedfUL, 0xbe3aa9bdUL, 0xe0000000UL, 0x3fe5b2c3UL, - 0x15fc9258UL, 0xbe479a37UL, 0xc0000000UL, 0x3fe5a5feUL, 0x9292c7eaUL, - 0x3e188650UL, 0x20000000UL, 0x3fe5993dUL, 0x33b4d380UL, 0x3e5d6d93UL, - 0x20000000UL, 0x3fe58c7fUL, 0x02fd16c7UL, 0x3e2fe961UL, 0xa0000000UL, - 0x3fe57fc4UL, 0x4a05edb6UL, 0xbe4d55b4UL, 0xa0000000UL, 0x3fe5730dUL, - 0x3d443abbUL, 0xbe5e6954UL, 0x00000000UL, 0x3fe5665aUL, 0x024acfeaUL, - 0x3e50e61bUL, 0x00000000UL, 0x3fe559aaUL, 0xcc9edd09UL, 0xbe325403UL, - 0x60000000UL, 0x3fe54cfdUL, 0x1fe26950UL, 0x3e5d500eUL, 0x60000000UL, - 0x3fe54054UL, 0x6c5ae164UL, 0xbe4a79b4UL, 0xc0000000UL, 0x3fe533aeUL, - 0x154b0287UL, 0xbe401571UL, 0xa0000000UL, 0x3fe5270cUL, 0x0673f401UL, - 0xbe56e56bUL, 0xe0000000UL, 0x3fe51a6dUL, 0x751b639cUL, 0x3e235269UL, - 0xa0000000UL, 0x3fe50dd2UL, 0x7c7b2bedUL, 0x3ddec887UL, 0xc0000000UL, - 0x3fe5013aUL, 0xafab4e17UL, 0x3e5e7575UL, 0x60000000UL, 0x3fe4f4a6UL, - 0x2e308668UL, 0x3e59aed6UL, 0x80000000UL, 0x3fe4e815UL, 0xf33e2a76UL, - 0xbe51f184UL, 0xe0000000UL, 0x3fe4db87UL, 0x839f3e3eUL, 0x3e57db01UL, - 0xc0000000UL, 0x3fe4cefdUL, 0xa9eda7bbUL, 0x3e535e0fUL, 0x00000000UL, - 0x3fe4c277UL, 0x2a8f66a5UL, 0x3e5ce451UL, 0xc0000000UL, 0x3fe4b5f3UL, - 0x05192456UL, 0xbe4e8518UL, 0xc0000000UL, 0x3fe4a973UL, 0x4aa7cd1dUL, - 0x3e46784aUL, 0x40000000UL, 0x3fe49cf7UL, 0x8e23025eUL, 0xbe5749f2UL, - 0x00000000UL, 0x3fe4907eUL, 0x18d30215UL, 0x3e360f39UL, 0x20000000UL, - 0x3fe48408UL, 0x63dcf2f3UL, 0x3e5e00feUL, 0xc0000000UL, 0x3fe47795UL, - 0x46182d09UL, 0xbe5173d9UL, 0xa0000000UL, 0x3fe46b26UL, 0x8f0e62aaUL, - 0xbe48f281UL, 0xe0000000UL, 0x3fe45ebaUL, 0x5775c40cUL, 0xbe56aad4UL, - 0x60000000UL, 0x3fe45252UL, 0x0fe25f69UL, 0x3e48bd71UL, 0x40000000UL, - 0x3fe445edUL, 0xe9989ec5UL, 0x3e590d97UL, 0x80000000UL, 0x3fe4398bUL, - 0xb3d9ffe3UL, 0x3e479dbcUL, 0x20000000UL, 0x3fe42d2dUL, 0x388e4d2eUL, - 0xbe5eed80UL, 0xe0000000UL, 0x3fe420d1UL, 0x6f797c18UL, 0x3e554b4cUL, - 0x20000000UL, 0x3fe4147aUL, 0x31048bb4UL, 0xbe5b1112UL, 0x80000000UL, - 0x3fe40825UL, 0x2efba4f9UL, 0x3e48ebc7UL, 0x40000000UL, 0x3fe3fbd4UL, - 0x50201119UL, 0x3e40b701UL, 0x40000000UL, 0x3fe3ef86UL, 0x0a4db32cUL, - 0x3e551de8UL, 0xa0000000UL, 0x3fe3e33bUL, 0x0c9c148bUL, 0xbe50c1f6UL, - 0x20000000UL, 0x3fe3d6f4UL, 0xc9129447UL, 0x3e533fa0UL, 0x00000000UL, - 0x3fe3cab0UL, 0xaae5b5a0UL, 0xbe22b68eUL, 0x20000000UL, 0x3fe3be6fUL, - 0x02305e8aUL, 0xbe54fc08UL, 0x60000000UL, 0x3fe3b231UL, 0x7f908258UL, - 0x3e57dc05UL, 0x00000000UL, 0x3fe3a5f7UL, 0x1a09af78UL, 0x3e08038bUL, - 0xe0000000UL, 0x3fe399bfUL, 0x490643c1UL, 0xbe5dbe42UL, 0xe0000000UL, - 0x3fe38d8bUL, 0x5e8ad724UL, 0xbe3c2b72UL, 0x20000000UL, 0x3fe3815bUL, - 0xc67196b6UL, 0x3e1713cfUL, 0xa0000000UL, 0x3fe3752dUL, 0x6182e429UL, - 0xbe3ec14cUL, 0x40000000UL, 0x3fe36903UL, 0xab6eb1aeUL, 0x3e5a2cc5UL, - 0x40000000UL, 0x3fe35cdcUL, 0xfe5dc064UL, 0xbe5c5878UL, 0x40000000UL, - 0x3fe350b8UL, 0x0ba6b9e4UL, 0x3e51619bUL, 0x80000000UL, 0x3fe34497UL, - 0x857761aaUL, 0x3e5fff53UL, 0x00000000UL, 0x3fe3387aUL, 0xf872d68cUL, - 0x3e484f4dUL, 0xa0000000UL, 0x3fe32c5fUL, 0x087e97c2UL, 0x3e52842eUL, - 0x80000000UL, 0x3fe32048UL, 0x73d6d0c0UL, 0xbe503edfUL, 0x80000000UL, - 0x3fe31434UL, 0x0c1456a1UL, 0xbe5f72adUL, 0xa0000000UL, 0x3fe30823UL, - 0x83a1a4d5UL, 0xbe5e65ccUL, 0xe0000000UL, 0x3fe2fc15UL, 0x855a7390UL, - 0xbe506438UL, 0x40000000UL, 0x3fe2f00bUL, 0xa2898287UL, 0x3e3d22a2UL, - 0xe0000000UL, 0x3fe2e403UL, 0x8b56f66fUL, 0xbe5aa5fdUL, 0x80000000UL, - 0x3fe2d7ffUL, 0x52db119aUL, 0x3e3a2e3dUL, 0x60000000UL, 0x3fe2cbfeUL, - 0xe2ddd4c0UL, 0xbe586469UL, 0x40000000UL, 0x3fe2c000UL, 0x6b01bf10UL, - 0x3e352b9dUL, 0x40000000UL, 0x3fe2b405UL, 0xb07a1cdfUL, 0x3e5c5cdaUL, - 0x80000000UL, 0x3fe2a80dUL, 0xc7b5f868UL, 0xbe5668b3UL, 0xc0000000UL, - 0x3fe29c18UL, 0x185edf62UL, 0xbe563d66UL, 0x00000000UL, 0x3fe29027UL, - 0xf729e1ccUL, 0x3e59a9a0UL, 0x80000000UL, 0x3fe28438UL, 0x6433c727UL, - 0xbe43cc89UL, 0x00000000UL, 0x3fe2784dUL, 0x41782631UL, 0xbe30750cUL, - 0xa0000000UL, 0x3fe26c64UL, 0x914911b7UL, 0xbe58290eUL, 0x40000000UL, - 0x3fe2607fUL, 0x3dcc73e1UL, 0xbe4269cdUL, 0x00000000UL, 0x3fe2549dUL, - 0x2751bf70UL, 0xbe5a6998UL, 0xc0000000UL, 0x3fe248bdUL, 0x4248b9fbUL, - 0xbe4ddb00UL, 0x80000000UL, 0x3fe23ce1UL, 0xf35cf82fUL, 0x3e561b71UL, - 0x60000000UL, 0x3fe23108UL, 0x8e481a2dUL, 0x3e518fb9UL, 0x60000000UL, - 0x3fe22532UL, 0x5ab96edcUL, 0xbe5fafc5UL, 0x40000000UL, 0x3fe2195fUL, - 0x80943911UL, 0xbe07f819UL, 0x40000000UL, 0x3fe20d8fUL, 0x386f2d6cUL, - 0xbe54ba8bUL, 0x40000000UL, 0x3fe201c2UL, 0xf29664acUL, 0xbe5eb815UL, - 0x20000000UL, 0x3fe1f5f8UL, 0x64f03390UL, 0x3e5e320cUL, 0x20000000UL, - 0x3fe1ea31UL, 0x747ff696UL, 0x3e5ef0a5UL, 0x40000000UL, 0x3fe1de6dUL, - 0x3e9ceb51UL, 0xbe5f8d27UL, 0x20000000UL, 0x3fe1d2acUL, 0x4ae0b55eUL, - 0x3e5faa21UL, 0x20000000UL, 0x3fe1c6eeUL, 0x28569a5eUL, 0x3e598a4fUL, - 0x20000000UL, 0x3fe1bb33UL, 0x54b33e07UL, 0x3e46130aUL, 0x20000000UL, - 0x3fe1af7bUL, 0x024f1078UL, 0xbe4dbf93UL, 0x00000000UL, 0x3fe1a3c6UL, - 0xb0783bfaUL, 0x3e419248UL, 0xe0000000UL, 0x3fe19813UL, 0x2f02b836UL, - 0x3e4e02b7UL, 0xc0000000UL, 0x3fe18c64UL, 0x28dec9d4UL, 0x3e09064fUL, - 0x80000000UL, 0x3fe180b8UL, 0x45cbf406UL, 0x3e5b1f46UL, 0x40000000UL, - 0x3fe1750fUL, 0x03d9964cUL, 0x3e5b0a79UL, 0x00000000UL, 0x3fe16969UL, - 0x8b5b882bUL, 0xbe238086UL, 0xa0000000UL, 0x3fe15dc5UL, 0x73bad6f8UL, - 0xbdf1fca4UL, 0x20000000UL, 0x3fe15225UL, 0x5385769cUL, 0x3e5e8d76UL, - 0xa0000000UL, 0x3fe14687UL, 0x1676dc6bUL, 0x3e571d08UL, 0x20000000UL, - 0x3fe13aedUL, 0xa8c41c7fUL, 0xbe598a25UL, 0x60000000UL, 0x3fe12f55UL, - 0xc4e1aaf0UL, 0x3e435277UL, 0xa0000000UL, 0x3fe123c0UL, 0x403638e1UL, - 0xbe21aa7cUL, 0xc0000000UL, 0x3fe1182eUL, 0x557a092bUL, 0xbdd0116bUL, - 0xc0000000UL, 0x3fe10c9fUL, 0x7d779f66UL, 0x3e4a61baUL, 0xc0000000UL, - 0x3fe10113UL, 0x2b09c645UL, 0xbe5d586eUL, 0x20000000UL, 0x3fe0ea04UL, - 0xea2cad46UL, 0x3e5aa97cUL, 0x20000000UL, 0x3fe0d300UL, 0x23190e54UL, - 0x3e50f1a7UL, 0xa0000000UL, 0x3fe0bc07UL, 0x1379a5a6UL, 0xbe51619dUL, - 0x60000000UL, 0x3fe0a51aUL, 0x926a3d4aUL, 0x3e5cf019UL, 0xa0000000UL, - 0x3fe08e38UL, 0xa8c24358UL, 0x3e35241eUL, 0x20000000UL, 0x3fe07762UL, - 0x24317e7aUL, 0x3e512cfaUL, 0x00000000UL, 0x3fe06097UL, 0xfd9cf274UL, - 0xbe55bef3UL, 0x00000000UL, 0x3fe049d7UL, 0x3689b49dUL, 0xbe36d26dUL, - 0x40000000UL, 0x3fe03322UL, 0xf72ef6c4UL, 0xbe54cd08UL, 0xa0000000UL, - 0x3fe01c78UL, 0x23702d2dUL, 0xbe5900bfUL, 0x00000000UL, 0x3fe005daUL, - 0x3f59c14cUL, 0x3e57d80bUL, 0x40000000UL, 0x3fdfde8dUL, 0xad67766dUL, - 0xbe57fad4UL, 0x40000000UL, 0x3fdfb17cUL, 0x644f4ae7UL, 0x3e1ee43bUL, - 0x40000000UL, 0x3fdf8481UL, 0x903234d2UL, 0x3e501a86UL, 0x40000000UL, - 0x3fdf579cUL, 0xafe9e509UL, 0xbe267c3eUL, 0x00000000UL, 0x3fdf2acdUL, - 0xb7dfda0bUL, 0xbe48149bUL, 0x40000000UL, 0x3fdefe13UL, 0x3b94305eUL, - 0x3e5f4ea7UL, 0x80000000UL, 0x3fded16fUL, 0x5d95da61UL, 0xbe55c198UL, - 0x00000000UL, 0x3fdea4e1UL, 0x406960c9UL, 0xbdd99a19UL, 0x00000000UL, - 0x3fde7868UL, 0xd22f3539UL, 0x3e470c78UL, 0x80000000UL, 0x3fde4c04UL, - 0x83eec535UL, 0xbe3e1232UL, 0x40000000UL, 0x3fde1fb6UL, 0x3dfbffcbUL, - 0xbe4b7d71UL, 0x40000000UL, 0x3fddf37dUL, 0x7e1be4e0UL, 0xbe5b8f8fUL, - 0x40000000UL, 0x3fddc759UL, 0x46dae887UL, 0xbe350458UL, 0x80000000UL, - 0x3fdd9b4aUL, 0xed6ecc49UL, 0xbe5f0045UL, 0x80000000UL, 0x3fdd6f50UL, - 0x2e9e883cUL, 0x3e2915daUL, 0x80000000UL, 0x3fdd436bUL, 0xf0bccb32UL, - 0x3e4a68c9UL, 0x80000000UL, 0x3fdd179bUL, 0x9bbfc779UL, 0xbe54a26aUL, - 0x00000000UL, 0x3fdcebe0UL, 0x7cea33abUL, 0x3e43c6b7UL, 0x40000000UL, - 0x3fdcc039UL, 0xe740fd06UL, 0x3e5526c2UL, 0x40000000UL, 0x3fdc94a7UL, - 0x9eadeb1aUL, 0xbe396d8dUL, 0xc0000000UL, 0x3fdc6929UL, 0xf0a8f95aUL, - 0xbe5c0ab2UL, 0x80000000UL, 0x3fdc3dc0UL, 0x6ee2693bUL, 0x3e0992e6UL, - 0xc0000000UL, 0x3fdc126bUL, 0x5ac6b581UL, 0xbe2834b6UL, 0x40000000UL, - 0x3fdbe72bUL, 0x8cc226ffUL, 0x3e3596a6UL, 0x00000000UL, 0x3fdbbbffUL, - 0xf92a74bbUL, 0x3e3c5813UL, 0x00000000UL, 0x3fdb90e7UL, 0x479664c0UL, - 0xbe50d644UL, 0x00000000UL, 0x3fdb65e3UL, 0x5004975bUL, 0xbe55258fUL, - 0x00000000UL, 0x3fdb3af3UL, 0xe4b23194UL, 0xbe588407UL, 0xc0000000UL, - 0x3fdb1016UL, 0xe65d4d0aUL, 0x3e527c26UL, 0x80000000UL, 0x3fdae54eUL, - 0x814fddd6UL, 0x3e5962a2UL, 0x40000000UL, 0x3fdaba9aUL, 0xe19d0913UL, - 0xbe562f4eUL, 0x80000000UL, 0x3fda8ff9UL, 0x43cfd006UL, 0xbe4cfdebUL, - 0x40000000UL, 0x3fda656cUL, 0x686f0a4eUL, 0x3e5e47a8UL, 0xc0000000UL, - 0x3fda3af2UL, 0x7200d410UL, 0x3e5e1199UL, 0xc0000000UL, 0x3fda108cUL, - 0xabd2266eUL, 0x3e5ee4d1UL, 0x40000000UL, 0x3fd9e63aUL, 0x396f8f2cUL, - 0x3e4dbffbUL, 0x00000000UL, 0x3fd9bbfbUL, 0xe32b25ddUL, 0x3e5c3a54UL, - 0x40000000UL, 0x3fd991cfUL, 0x431e4035UL, 0xbe457925UL, 0x80000000UL, - 0x3fd967b6UL, 0x7bed3dd3UL, 0x3e40c61dUL, 0x00000000UL, 0x3fd93db1UL, - 0xd7449365UL, 0x3e306419UL, 0x80000000UL, 0x3fd913beUL, 0x1746e791UL, - 0x3e56fcfcUL, 0x40000000UL, 0x3fd8e9dfUL, 0xf3a9028bUL, 0xbe5041b9UL, - 0xc0000000UL, 0x3fd8c012UL, 0x56840c50UL, 0xbe26e20aUL, 0x40000000UL, - 0x3fd89659UL, 0x19763102UL, 0xbe51f466UL, 0x80000000UL, 0x3fd86cb2UL, - 0x7032de7cUL, 0xbe4d298aUL, 0x80000000UL, 0x3fd8431eUL, 0xdeb39fabUL, - 0xbe4361ebUL, 0x40000000UL, 0x3fd8199dUL, 0x5d01cbe0UL, 0xbe5425b3UL, - 0x80000000UL, 0x3fd7f02eUL, 0x3ce99aa9UL, 0x3e146fa8UL, 0x80000000UL, - 0x3fd7c6d2UL, 0xd1a262b9UL, 0xbe5a1a69UL, 0xc0000000UL, 0x3fd79d88UL, - 0x8606c236UL, 0x3e423a08UL, 0x80000000UL, 0x3fd77451UL, 0x8fd1e1b7UL, - 0x3e5a6a63UL, 0xc0000000UL, 0x3fd74b2cUL, 0xe491456aUL, 0x3e42c1caUL, - 0x40000000UL, 0x3fd7221aUL, 0x4499a6d7UL, 0x3e36a69aUL, 0x00000000UL, - 0x3fd6f91aUL, 0x5237df94UL, 0xbe0f8f02UL, 0x00000000UL, 0x3fd6d02cUL, - 0xb6482c6eUL, 0xbe5abcf7UL, 0x00000000UL, 0x3fd6a750UL, 0x1919fd61UL, - 0xbe57ade2UL, 0x00000000UL, 0x3fd67e86UL, 0xaa7a994dUL, 0xbe3f3fbdUL, - 0x00000000UL, 0x3fd655ceUL, 0x67db014cUL, 0x3e33c550UL, 0x00000000UL, - 0x3fd62d28UL, 0xa82856b7UL, 0xbe1409d1UL, 0xc0000000UL, 0x3fd60493UL, - 0x1e6a300dUL, 0x3e55d899UL, 0x80000000UL, 0x3fd5dc11UL, 0x1222bd5cUL, - 0xbe35bfc0UL, 0xc0000000UL, 0x3fd5b3a0UL, 0x6e8dc2d3UL, 0x3e5d4d79UL, - 0x00000000UL, 0x3fd58b42UL, 0xe0e4ace6UL, 0xbe517303UL, 0x80000000UL, - 0x3fd562f4UL, 0xb306e0a8UL, 0x3e5edf0fUL, 0xc0000000UL, 0x3fd53ab8UL, - 0x6574bc54UL, 0x3e5ee859UL, 0x80000000UL, 0x3fd5128eUL, 0xea902207UL, - 0x3e5f6188UL, 0xc0000000UL, 0x3fd4ea75UL, 0x9f911d79UL, 0x3e511735UL, - 0x80000000UL, 0x3fd4c26eUL, 0xf9c77397UL, 0xbe5b1643UL, 0x40000000UL, - 0x3fd49a78UL, 0x15fc9258UL, 0x3e479a37UL, 0x80000000UL, 0x3fd47293UL, - 0xd5a04dd9UL, 0xbe426e56UL, 0xc0000000UL, 0x3fd44abfUL, 0xe04042f5UL, - 0x3e56f7c6UL, 0x40000000UL, 0x3fd422fdUL, 0x1d8bf2c8UL, 0x3e5d8810UL, - 0x00000000UL, 0x3fd3fb4cUL, 0x88a8ddeeUL, 0xbe311454UL, 0xc0000000UL, - 0x3fd3d3abUL, 0x3e3b5e47UL, 0xbe5d1b72UL, 0x40000000UL, 0x3fd3ac1cUL, - 0xc2ab5d59UL, 0x3e31b02bUL, 0xc0000000UL, 0x3fd3849dUL, 0xd4e34b9eUL, - 0x3e51cb2fUL, 0x40000000UL, 0x3fd35d30UL, 0x177204fbUL, 0xbe2b8cd7UL, - 0x80000000UL, 0x3fd335d3UL, 0xfcd38c82UL, 0xbe4356e1UL, 0x80000000UL, - 0x3fd30e87UL, 0x64f54accUL, 0xbe4e6224UL, 0x00000000UL, 0x3fd2e74cUL, - 0xaa7975d9UL, 0x3e5dc0feUL, 0x80000000UL, 0x3fd2c021UL, 0x516dab3fUL, - 0xbe50ffa3UL, 0x40000000UL, 0x3fd29907UL, 0x2bfb7313UL, 0x3e5674a2UL, - 0xc0000000UL, 0x3fd271fdUL, 0x0549fc99UL, 0x3e385d29UL, 0xc0000000UL, - 0x3fd24b04UL, 0x55b63073UL, 0xbe500c6dUL, 0x00000000UL, 0x3fd2241cUL, - 0x3f91953aUL, 0x3e389977UL, 0xc0000000UL, 0x3fd1fd43UL, 0xa1543f71UL, - 0xbe3487abUL, 0xc0000000UL, 0x3fd1d67bUL, 0x4ec8867cUL, 0x3df6a2dcUL, - 0x00000000UL, 0x3fd1afc4UL, 0x4328e3bbUL, 0x3e41d9c0UL, 0x80000000UL, - 0x3fd1891cUL, 0x2e1cda84UL, 0x3e3bdd87UL, 0x40000000UL, 0x3fd16285UL, - 0x4b5331aeUL, 0xbe53128eUL, 0x00000000UL, 0x3fd13bfeUL, 0xb9aec164UL, - 0xbe52ac98UL, 0xc0000000UL, 0x3fd11586UL, 0xd91e1316UL, 0xbe350630UL, - 0x80000000UL, 0x3fd0ef1fUL, 0x7cacc12cUL, 0x3e3f5219UL, 0x40000000UL, - 0x3fd0c8c8UL, 0xbce277b7UL, 0x3e3d30c0UL, 0x00000000UL, 0x3fd0a281UL, - 0x2a63447dUL, 0xbe541377UL, 0x80000000UL, 0x3fd07c49UL, 0xfac483b5UL, - 0xbe5772ecUL, 0xc0000000UL, 0x3fd05621UL, 0x36b8a570UL, 0xbe4fd4bdUL, - 0xc0000000UL, 0x3fd03009UL, 0xbae505f7UL, 0xbe450388UL, 0x80000000UL, - 0x3fd00a01UL, 0x3e35aeadUL, 0xbe5430fcUL, 0x80000000UL, 0x3fcfc811UL, - 0x707475acUL, 0x3e38806eUL, 0x80000000UL, 0x3fcf7c3fUL, 0xc91817fcUL, - 0xbe40cceaUL, 0x80000000UL, 0x3fcf308cUL, 0xae05d5e9UL, 0xbe4919b8UL, - 0x80000000UL, 0x3fcee4f8UL, 0xae6cc9e6UL, 0xbe530b94UL, 0x00000000UL, - 0x3fce9983UL, 0x1efe3e8eUL, 0x3e57747eUL, 0x00000000UL, 0x3fce4e2dUL, - 0xda78d9bfUL, 0xbe59a608UL, 0x00000000UL, 0x3fce02f5UL, 0x8abe2c2eUL, - 0x3e4a35adUL, 0x00000000UL, 0x3fcdb7dcUL, 0x1495450dUL, 0xbe0872ccUL, - 0x80000000UL, 0x3fcd6ce1UL, 0x86ee0ba0UL, 0xbe4f59a0UL, 0x00000000UL, - 0x3fcd2205UL, 0xe81ca888UL, 0x3e5402c3UL, 0x00000000UL, 0x3fccd747UL, - 0x3b4424b9UL, 0x3e5dfdc3UL, 0x80000000UL, 0x3fcc8ca7UL, 0xd305b56cUL, - 0x3e202da6UL, 0x00000000UL, 0x3fcc4226UL, 0x399a6910UL, 0xbe482a1cUL, - 0x80000000UL, 0x3fcbf7c2UL, 0x747f7938UL, 0xbe587372UL, 0x80000000UL, - 0x3fcbad7cUL, 0x6fc246a0UL, 0x3e50d83dUL, 0x00000000UL, 0x3fcb6355UL, - 0xee9e9be5UL, 0xbe5c35bdUL, 0x80000000UL, 0x3fcb194aUL, 0x8416c0bcUL, - 0x3e546d4fUL, 0x00000000UL, 0x3fcacf5eUL, 0x49f7f08fUL, 0x3e56da76UL, - 0x00000000UL, 0x3fca858fUL, 0x5dc30de2UL, 0x3e5f390cUL, 0x00000000UL, - 0x3fca3bdeUL, 0x950583b6UL, 0xbe5e4169UL, 0x80000000UL, 0x3fc9f249UL, - 0x33631553UL, 0x3e52aeb1UL, 0x00000000UL, 0x3fc9a8d3UL, 0xde8795a6UL, - 0xbe59a504UL, 0x00000000UL, 0x3fc95f79UL, 0x076bf41eUL, 0x3e5122feUL, - 0x80000000UL, 0x3fc9163cUL, 0x2914c8e7UL, 0x3e3dd064UL, 0x00000000UL, - 0x3fc8cd1dUL, 0x3a30eca3UL, 0xbe21b4aaUL, 0x80000000UL, 0x3fc8841aUL, - 0xb2a96650UL, 0xbe575444UL, 0x80000000UL, 0x3fc83b34UL, 0x2376c0cbUL, - 0xbe2a74c7UL, 0x80000000UL, 0x3fc7f26bUL, 0xd8a0b653UL, 0xbe5181b6UL, - 0x00000000UL, 0x3fc7a9bfUL, 0x32257882UL, 0xbe4a78b4UL, 0x00000000UL, - 0x3fc7612fUL, 0x1eee8bd9UL, 0xbe1bfe9dUL, 0x80000000UL, 0x3fc718bbUL, - 0x0c603cc4UL, 0x3e36fdc9UL, 0x80000000UL, 0x3fc6d064UL, 0x3728b8cfUL, - 0xbe1e542eUL, 0x80000000UL, 0x3fc68829UL, 0xc79a4067UL, 0x3e5c380fUL, - 0x00000000UL, 0x3fc6400bUL, 0xf69eac69UL, 0x3e550a84UL, 0x80000000UL, - 0x3fc5f808UL, 0xb7a780a4UL, 0x3e5d9224UL, 0x80000000UL, 0x3fc5b022UL, - 0xad9dfb1eUL, 0xbe55242fUL, 0x00000000UL, 0x3fc56858UL, 0x659b18beUL, - 0xbe4bfda3UL, 0x80000000UL, 0x3fc520a9UL, 0x66ee3631UL, 0xbe57d769UL, - 0x80000000UL, 0x3fc4d916UL, 0x1ec62819UL, 0x3e2427f7UL, 0x80000000UL, - 0x3fc4919fUL, 0xdec25369UL, 0xbe435431UL, 0x00000000UL, 0x3fc44a44UL, - 0xa8acfc4bUL, 0xbe3c62e8UL, 0x00000000UL, 0x3fc40304UL, 0xcf1d3eabUL, - 0xbdfba29fUL, 0x80000000UL, 0x3fc3bbdfUL, 0x79aba3eaUL, 0xbdf1b7c8UL, - 0x80000000UL, 0x3fc374d6UL, 0xb8d186daUL, 0xbe5130cfUL, 0x80000000UL, - 0x3fc32de8UL, 0x9d74f152UL, 0x3e2285b6UL, 0x00000000UL, 0x3fc2e716UL, - 0x50ae7ca9UL, 0xbe503920UL, 0x80000000UL, 0x3fc2a05eUL, 0x6caed92eUL, - 0xbe533924UL, 0x00000000UL, 0x3fc259c2UL, 0x9cb5034eUL, 0xbe510e31UL, - 0x80000000UL, 0x3fc21340UL, 0x12c4d378UL, 0xbe540b43UL, 0x80000000UL, - 0x3fc1ccd9UL, 0xcc418706UL, 0x3e59887aUL, 0x00000000UL, 0x3fc1868eUL, - 0x921f4106UL, 0xbe528e67UL, 0x80000000UL, 0x3fc1405cUL, 0x3969441eUL, - 0x3e5d8051UL, 0x00000000UL, 0x3fc0fa46UL, 0xd941ef5bUL, 0x3e5f9079UL, - 0x80000000UL, 0x3fc0b44aUL, 0x5a3e81b2UL, 0xbe567691UL, 0x00000000UL, - 0x3fc06e69UL, 0x9d66afe7UL, 0xbe4d43fbUL, 0x00000000UL, 0x3fc028a2UL, - 0x0a92a162UL, 0xbe52f394UL, 0x00000000UL, 0x3fbfc5eaUL, 0x209897e5UL, - 0x3e529e37UL, 0x00000000UL, 0x3fbf3ac5UL, 0x8458bd7bUL, 0x3e582831UL, - 0x00000000UL, 0x3fbeafd5UL, 0xb8d8b4b8UL, 0xbe486b4aUL, 0x00000000UL, - 0x3fbe2518UL, 0xe0a3b7b6UL, 0x3e5bafd2UL, 0x00000000UL, 0x3fbd9a90UL, - 0x2bf2710eUL, 0x3e383b2bUL, 0x00000000UL, 0x3fbd103cUL, 0x73eb6ab7UL, - 0xbe56d78dUL, 0x00000000UL, 0x3fbc861bUL, 0x32ceaff5UL, 0xbe32dc5aUL, - 0x00000000UL, 0x3fbbfc2eUL, 0xbee04cb7UL, 0xbe4a71a4UL, 0x00000000UL, - 0x3fbb7274UL, 0x35ae9577UL, 0x3e38142fUL, 0x00000000UL, 0x3fbae8eeUL, - 0xcbaddab4UL, 0xbe5490f0UL, 0x00000000UL, 0x3fba5f9aUL, 0x95ce1114UL, - 0x3e597c71UL, 0x00000000UL, 0x3fb9d67aUL, 0x6d7c0f78UL, 0x3e3abc2dUL, - 0x00000000UL, 0x3fb94d8dUL, 0x2841a782UL, 0xbe566cbcUL, 0x00000000UL, - 0x3fb8c4d2UL, 0x6ed429c6UL, 0xbe3cfff9UL, 0x00000000UL, 0x3fb83c4aUL, - 0xe4a49fbbUL, 0xbe552964UL, 0x00000000UL, 0x3fb7b3f4UL, 0x2193d81eUL, - 0xbe42fa72UL, 0x00000000UL, 0x3fb72bd0UL, 0xdd70c122UL, 0x3e527a8cUL, - 0x00000000UL, 0x3fb6a3dfUL, 0x03108a54UL, 0xbe450393UL, 0x00000000UL, - 0x3fb61c1fUL, 0x30ff7954UL, 0x3e565840UL, 0x00000000UL, 0x3fb59492UL, - 0xdedd460cUL, 0xbe5422b5UL, 0x00000000UL, 0x3fb50d36UL, 0x950f9f45UL, - 0xbe5313f6UL, 0x00000000UL, 0x3fb4860bUL, 0x582cdcb1UL, 0x3e506d39UL, - 0x00000000UL, 0x3fb3ff12UL, 0x7216d3a6UL, 0x3e4aa719UL, 0x00000000UL, - 0x3fb3784aUL, 0x57a423fdUL, 0x3e5a9b9fUL, 0x00000000UL, 0x3fb2f1b4UL, - 0x7a138b41UL, 0xbe50b418UL, 0x00000000UL, 0x3fb26b4eUL, 0x2fbfd7eaUL, - 0x3e23a53eUL, 0x00000000UL, 0x3fb1e519UL, 0x18913ccbUL, 0x3e465fc1UL, - 0x00000000UL, 0x3fb15f15UL, 0x7ea24e21UL, 0x3e042843UL, 0x00000000UL, - 0x3fb0d941UL, 0x7c6d9c77UL, 0x3e59f61eUL, 0x00000000UL, 0x3fb0539eUL, - 0x114efd44UL, 0x3e4ccab7UL, 0x00000000UL, 0x3faf9c56UL, 0x1777f657UL, - 0x3e552f65UL, 0x00000000UL, 0x3fae91d2UL, 0xc317b86aUL, 0xbe5a61e0UL, - 0x00000000UL, 0x3fad87acUL, 0xb7664efbUL, 0xbe41f64eUL, 0x00000000UL, - 0x3fac7de6UL, 0x5d3d03a9UL, 0x3e0807a0UL, 0x00000000UL, 0x3fab7480UL, - 0x743c38ebUL, 0xbe3726e1UL, 0x00000000UL, 0x3faa6b78UL, 0x06a253f1UL, - 0x3e5ad636UL, 0x00000000UL, 0x3fa962d0UL, 0xa35f541bUL, 0x3e5a187aUL, - 0x00000000UL, 0x3fa85a88UL, 0x4b86e446UL, 0xbe508150UL, 0x00000000UL, - 0x3fa7529cUL, 0x2589cacfUL, 0x3e52938aUL, 0x00000000UL, 0x3fa64b10UL, - 0xaf6b11f2UL, 0xbe3454cdUL, 0x00000000UL, 0x3fa543e2UL, 0x97506fefUL, - 0xbe5fdec5UL, 0x00000000UL, 0x3fa43d10UL, 0xe75f7dd9UL, 0xbe388dd3UL, - 0x00000000UL, 0x3fa3369cUL, 0xa4139632UL, 0xbdea5177UL, 0x00000000UL, - 0x3fa23086UL, 0x352d6f1eUL, 0xbe565ad6UL, 0x00000000UL, 0x3fa12accUL, - 0x77449eb7UL, 0xbe50d5c7UL, 0x00000000UL, 0x3fa0256eUL, 0x7478da78UL, - 0x3e404724UL, 0x00000000UL, 0x3f9e40dcUL, 0xf59cef7fUL, 0xbe539d0aUL, - 0x00000000UL, 0x3f9c3790UL, 0x1511d43cUL, 0x3e53c2c8UL, 0x00000000UL, - 0x3f9a2f00UL, 0x9b8bff3cUL, 0xbe43b3e1UL, 0x00000000UL, 0x3f982724UL, - 0xad1e22a5UL, 0x3e46f0bdUL, 0x00000000UL, 0x3f962000UL, 0x130d9356UL, - 0x3e475ba0UL, 0x00000000UL, 0x3f941994UL, 0x8f86f883UL, 0xbe513d0bUL, - 0x00000000UL, 0x3f9213dcUL, 0x914d0dc8UL, 0xbe534335UL, 0x00000000UL, - 0x3f900ed8UL, 0x2d73e5e7UL, 0xbe22ba75UL, 0x00000000UL, 0x3f8c1510UL, - 0xc5b7d70eUL, 0x3e599c5dUL, 0x00000000UL, 0x3f880de0UL, 0x8a27857eUL, - 0xbe3d28c8UL, 0x00000000UL, 0x3f840810UL, 0xda767328UL, 0x3e531b3dUL, - 0x00000000UL, 0x3f8003b0UL, 0x77bacaf3UL, 0xbe5f04e3UL, 0x00000000UL, - 0x3f780150UL, 0xdf4b0720UL, 0x3e5a8bffUL, 0x00000000UL, 0x3f6ffc40UL, - 0x34c48e71UL, 0xbe3fcd99UL, 0x00000000UL, 0x3f5ff6c0UL, 0x1ad218afUL, - 0xbe4c78a7UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL, - 0x00000000UL, 0xfffff800UL, 0x00000000UL, 0xfffff800UL, 0x00000000UL, - 0x3ff72000UL, 0x161bb241UL, 0xbf5dabe1UL, 0x6dc96112UL, 0xbf836578UL, - 0xee241472UL, 0xbf9b0301UL, 0x9f95985aUL, 0xbfb528dbUL, 0xb3841d2aUL, - 0xbfd619b6UL, 0x518775e3UL, 0x3f9004f2UL, 0xac8349bbUL, 0x3fa76c9bUL, - 0x486ececcUL, 0x3fc4635eUL, 0x161bb241UL, 0xbf5dabe1UL, 0x9f95985aUL, - 0xbfb528dbUL, 0xf8b5787dUL, 0x3ef2531eUL, 0x486ececbUL, 0x3fc4635eUL, - 0x412055ccUL, 0xbdd61bb2UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, - 0xffffffffUL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x3b700000UL, - 0xfa5abcbfUL, 0x3ff00b1aUL, 0xa7609f71UL, 0xbc84f6b2UL, 0xa9fb3335UL, - 0x3ff0163dUL, 0x9ab8cdb7UL, 0x3c9b6129UL, 0x143b0281UL, 0x3ff02168UL, - 0x0fc54eb6UL, 0xbc82bf31UL, 0x3e778061UL, 0x3ff02c9aUL, 0x535b085dUL, - 0xbc719083UL, 0x2e11bbccUL, 0x3ff037d4UL, 0xeeade11aUL, 0x3c656811UL, - 0xe86e7f85UL, 0x3ff04315UL, 0x1977c96eUL, 0xbc90a31cUL, 0x72f654b1UL, - 0x3ff04e5fUL, 0x3aa0d08cUL, 0x3c84c379UL, 0xd3158574UL, 0x3ff059b0UL, - 0xa475b465UL, 0x3c8d73e2UL, 0x0e3c1f89UL, 0x3ff0650aUL, 0x5799c397UL, - 0xbc95cb7bUL, 0x29ddf6deUL, 0x3ff0706bUL, 0xe2b13c27UL, 0xbc8c91dfUL, - 0x2b72a836UL, 0x3ff07bd4UL, 0x54458700UL, 0x3c832334UL, 0x18759bc8UL, - 0x3ff08745UL, 0x4bb284ffUL, 0x3c6186beUL, 0xf66607e0UL, 0x3ff092bdUL, - 0x800a3fd1UL, 0xbc968063UL, 0xcac6f383UL, 0x3ff09e3eUL, 0x18316136UL, - 0x3c914878UL, 0x9b1f3919UL, 0x3ff0a9c7UL, 0x873d1d38UL, 0x3c85d16cUL, - 0x6cf9890fUL, 0x3ff0b558UL, 0x4adc610bUL, 0x3c98a62eUL, 0x45e46c85UL, - 0x3ff0c0f1UL, 0x06d21cefUL, 0x3c94f989UL, 0x2b7247f7UL, 0x3ff0cc92UL, - 0x16e24f71UL, 0x3c901edcUL, 0x23395decUL, 0x3ff0d83bUL, 0xe43f316aUL, - 0xbc9bc14dUL, 0x32d3d1a2UL, 0x3ff0e3ecUL, 0x27c57b52UL, 0x3c403a17UL, - 0x5fdfa9c5UL, 0x3ff0efa5UL, 0xbc54021bUL, 0xbc949db9UL, 0xaffed31bUL, - 0x3ff0fb66UL, 0xc44ebd7bUL, 0xbc6b9bedUL, 0x28d7233eUL, 0x3ff10730UL, - 0x1692fdd5UL, 0x3c8d46ebUL, 0xd0125b51UL, 0x3ff11301UL, 0x39449b3aUL, - 0xbc96c510UL, 0xab5e2ab6UL, 0x3ff11edbUL, 0xf703fb72UL, 0xbc9ca454UL, - 0xc06c31ccUL, 0x3ff12abdUL, 0xb36ca5c7UL, 0xbc51b514UL, 0x14f204abUL, - 0x3ff136a8UL, 0xba48dcf0UL, 0xbc67108fUL, 0xaea92de0UL, 0x3ff1429aUL, - 0x9af1369eUL, 0xbc932fbfUL, 0x934f312eUL, 0x3ff14e95UL, 0x39bf44abUL, - 0xbc8b91e8UL, 0xc8a58e51UL, 0x3ff15a98UL, 0xb9eeab0aUL, 0x3c82406aUL, - 0x5471c3c2UL, 0x3ff166a4UL, 0x82ea1a32UL, 0x3c58f23bUL, 0x3c7d517bUL, - 0x3ff172b8UL, 0xb9d78a76UL, 0xbc819041UL, 0x8695bbc0UL, 0x3ff17ed4UL, - 0xe2ac5a64UL, 0x3c709e3fUL, 0x388c8deaUL, 0x3ff18af9UL, 0xd1970f6cUL, - 0xbc911023UL, 0x58375d2fUL, 0x3ff19726UL, 0x85f17e08UL, 0x3c94aaddUL, - 0xeb6fcb75UL, 0x3ff1a35bUL, 0x7b4968e4UL, 0x3c8e5b4cUL, 0xf8138a1cUL, - 0x3ff1af99UL, 0xa4b69280UL, 0x3c97bf85UL, 0x84045cd4UL, 0x3ff1bbe0UL, - 0x352ef607UL, 0xbc995386UL, 0x95281c6bUL, 0x3ff1c82fUL, 0x8010f8c9UL, - 0x3c900977UL, 0x3168b9aaUL, 0x3ff1d487UL, 0x00a2643cUL, 0x3c9e016eUL, - 0x5eb44027UL, 0x3ff1e0e7UL, 0x088cb6deUL, 0xbc96fdd8UL, 0x22fcd91dUL, - 0x3ff1ed50UL, 0x027bb78cUL, 0xbc91df98UL, 0x8438ce4dUL, 0x3ff1f9c1UL, - 0xa097af5cUL, 0xbc9bf524UL, 0x88628cd6UL, 0x3ff2063bUL, 0x814a8495UL, - 0x3c8dc775UL, 0x3578a819UL, 0x3ff212beUL, 0x2cfcaac9UL, 0x3c93592dUL, - 0x917ddc96UL, 0x3ff21f49UL, 0x9494a5eeUL, 0x3c82a97eUL, 0xa27912d1UL, - 0x3ff22bddUL, 0x5577d69fUL, 0x3c8d34fbUL, 0x6e756238UL, 0x3ff2387aUL, - 0xb6c70573UL, 0x3c99b07eUL, 0xfb82140aUL, 0x3ff2451fUL, 0x911ca996UL, - 0x3c8acfccUL, 0x4fb2a63fUL, 0x3ff251ceUL, 0xbef4f4a4UL, 0x3c8ac155UL, - 0x711ece75UL, 0x3ff25e85UL, 0x4ac31b2cUL, 0x3c93e1a2UL, 0x65e27cddUL, - 0x3ff26b45UL, 0x9940e9d9UL, 0x3c82bd33UL, 0x341ddf29UL, 0x3ff2780eUL, - 0x05f9e76cUL, 0x3c9e067cUL, 0xe1f56381UL, 0x3ff284dfUL, 0x8c3f0d7eUL, - 0xbc9a4c3aUL, 0x7591bb70UL, 0x3ff291baUL, 0x28401cbdUL, 0xbc82cc72UL, - 0xf51fdee1UL, 0x3ff29e9dUL, 0xafad1255UL, 0x3c8612e8UL, 0x66d10f13UL, - 0x3ff2ab8aUL, 0x191690a7UL, 0xbc995743UL, 0xd0dad990UL, 0x3ff2b87fUL, - 0xd6381aa4UL, 0xbc410adcUL, 0x39771b2fUL, 0x3ff2c57eUL, 0xa6eb5124UL, - 0xbc950145UL, 0xa6e4030bUL, 0x3ff2d285UL, 0x54db41d5UL, 0x3c900247UL, - 0x1f641589UL, 0x3ff2df96UL, 0xfbbce198UL, 0x3c9d16cfUL, 0xa93e2f56UL, - 0x3ff2ecafUL, 0x45d52383UL, 0x3c71ca0fUL, 0x4abd886bUL, 0x3ff2f9d2UL, - 0x532bda93UL, 0xbc653c55UL, 0x0a31b715UL, 0x3ff306feUL, 0xd23182e4UL, - 0x3c86f46aUL, 0xedeeb2fdUL, 0x3ff31432UL, 0xf3f3fcd1UL, 0x3c8959a3UL, - 0xfc4cd831UL, 0x3ff32170UL, 0x8e18047cUL, 0x3c8a9ce7UL, 0x3ba8ea32UL, - 0x3ff32eb8UL, 0x3cb4f318UL, 0xbc9c45e8UL, 0xb26416ffUL, 0x3ff33c08UL, - 0x843659a6UL, 0x3c932721UL, 0x66e3fa2dUL, 0x3ff34962UL, 0x930881a4UL, - 0xbc835a75UL, 0x5f929ff1UL, 0x3ff356c5UL, 0x5c4e4628UL, 0xbc8b5ceeUL, - 0xa2de883bUL, 0x3ff36431UL, 0xa06cb85eUL, 0xbc8c3144UL, 0x373aa9cbUL, - 0x3ff371a7UL, 0xbf42eae2UL, 0xbc963aeaUL, 0x231e754aUL, 0x3ff37f26UL, - 0x9eceb23cUL, 0xbc99f5caUL, 0x6d05d866UL, 0x3ff38caeUL, 0x3c9904bdUL, - 0xbc9e958dUL, 0x1b7140efUL, 0x3ff39a40UL, 0xfc8e2934UL, 0xbc99a9a5UL, - 0x34e59ff7UL, 0x3ff3a7dbUL, 0xd661f5e3UL, 0xbc75e436UL, 0xbfec6cf4UL, - 0x3ff3b57fUL, 0xe26fff18UL, 0x3c954c66UL, 0xc313a8e5UL, 0x3ff3c32dUL, - 0x375d29c3UL, 0xbc9efff8UL, 0x44ede173UL, 0x3ff3d0e5UL, 0x8c284c71UL, - 0x3c7fe8d0UL, 0x4c123422UL, 0x3ff3dea6UL, 0x11f09ebcUL, 0x3c8ada09UL, - 0xdf1c5175UL, 0x3ff3ec70UL, 0x7b8c9bcaUL, 0xbc8af663UL, 0x04ac801cUL, - 0x3ff3fa45UL, 0xf956f9f3UL, 0xbc97d023UL, 0xc367a024UL, 0x3ff40822UL, - 0xb6f4d048UL, 0x3c8bddf8UL, 0x21f72e2aUL, 0x3ff4160aUL, 0x1c309278UL, - 0xbc5ef369UL, 0x2709468aUL, 0x3ff423fbUL, 0xc0b314ddUL, 0xbc98462dUL, - 0xd950a897UL, 0x3ff431f5UL, 0xe35f7999UL, 0xbc81c7ddUL, 0x3f84b9d4UL, - 0x3ff43ffaUL, 0x9704c003UL, 0x3c8880beUL, 0x6061892dUL, 0x3ff44e08UL, - 0x04ef80d0UL, 0x3c489b7aUL, 0x42a7d232UL, 0x3ff45c20UL, 0x82fb1f8eUL, - 0xbc686419UL, 0xed1d0057UL, 0x3ff46a41UL, 0xd1648a76UL, 0x3c9c944bUL, - 0x668b3237UL, 0x3ff4786dUL, 0xed445733UL, 0xbc9c20f0UL, 0xb5c13cd0UL, - 0x3ff486a2UL, 0xb69062f0UL, 0x3c73c1a3UL, 0xe192aed2UL, 0x3ff494e1UL, - 0x5e499ea0UL, 0xbc83b289UL, 0xf0d7d3deUL, 0x3ff4a32aUL, 0xf3d1be56UL, - 0x3c99cb62UL, 0xea6db7d7UL, 0x3ff4b17dUL, 0x7f2897f0UL, 0xbc8125b8UL, - 0xd5362a27UL, 0x3ff4bfdaUL, 0xafec42e2UL, 0x3c7d4397UL, 0xb817c114UL, - 0x3ff4ce41UL, 0x690abd5dUL, 0x3c905e29UL, 0x99fddd0dUL, 0x3ff4dcb2UL, - 0xbc6a7833UL, 0x3c98ecdbUL, 0x81d8abffUL, 0x3ff4eb2dUL, 0x2e5d7a52UL, - 0xbc95257dUL, 0x769d2ca7UL, 0x3ff4f9b2UL, 0xd25957e3UL, 0xbc94b309UL, - 0x7f4531eeUL, 0x3ff50841UL, 0x49b7465fUL, 0x3c7a249bUL, 0xa2cf6642UL, - 0x3ff516daUL, 0x69bd93efUL, 0xbc8f7685UL, 0xe83f4eefUL, 0x3ff5257dUL, - 0x43efef71UL, 0xbc7c998dUL, 0x569d4f82UL, 0x3ff5342bUL, 0x1db13cadUL, - 0xbc807abeUL, 0xf4f6ad27UL, 0x3ff542e2UL, 0x192d5f7eUL, 0x3c87926dUL, - 0xca5d920fUL, 0x3ff551a4UL, 0xefede59bUL, 0xbc8d689cUL, 0xdde910d2UL, - 0x3ff56070UL, 0x168eebf0UL, 0xbc90fb6eUL, 0x36b527daUL, 0x3ff56f47UL, - 0x011d93adUL, 0x3c99bb2cUL, 0xdbe2c4cfUL, 0x3ff57e27UL, 0x8a57b9c4UL, - 0xbc90b98cUL, 0xd497c7fdUL, 0x3ff58d12UL, 0x5b9a1de8UL, 0x3c8295e1UL, - 0x27ff07ccUL, 0x3ff59c08UL, 0xe467e60fUL, 0xbc97e2ceUL, 0xdd485429UL, - 0x3ff5ab07UL, 0x054647adUL, 0x3c96324cUL, 0xfba87a03UL, 0x3ff5ba11UL, - 0x4c233e1aUL, 0xbc9b77a1UL, 0x8a5946b7UL, 0x3ff5c926UL, 0x816986a2UL, - 0x3c3c4b1bUL, 0x90998b93UL, 0x3ff5d845UL, 0xa8b45643UL, 0xbc9cd6a7UL, - 0x15ad2148UL, 0x3ff5e76fUL, 0x3080e65eUL, 0x3c9ba6f9UL, 0x20dceb71UL, - 0x3ff5f6a3UL, 0xe3cdcf92UL, 0xbc89eaddUL, 0xb976dc09UL, 0x3ff605e1UL, - 0x9b56de47UL, 0xbc93e242UL, 0xe6cdf6f4UL, 0x3ff6152aUL, 0x4ab84c27UL, - 0x3c9e4b3eUL, 0xb03a5585UL, 0x3ff6247eUL, 0x7e40b497UL, 0xbc9383c1UL, - 0x1d1929fdUL, 0x3ff633ddUL, 0xbeb964e5UL, 0x3c984710UL, 0x34ccc320UL, - 0x3ff64346UL, 0x759d8933UL, 0xbc8c483cUL, 0xfebc8fb7UL, 0x3ff652b9UL, - 0xc9a73e09UL, 0xbc9ae3d5UL, 0x82552225UL, 0x3ff66238UL, 0x87591c34UL, - 0xbc9bb609UL, 0xc70833f6UL, 0x3ff671c1UL, 0x586c6134UL, 0xbc8e8732UL, - 0xd44ca973UL, 0x3ff68155UL, 0x44f73e65UL, 0x3c6038aeUL, 0xb19e9538UL, - 0x3ff690f4UL, 0x9aeb445dUL, 0x3c8804bdUL, 0x667f3bcdUL, 0x3ff6a09eUL, - 0x13b26456UL, 0xbc9bdd34UL, 0xfa75173eUL, 0x3ff6b052UL, 0x2c9a9d0eUL, - 0x3c7a38f5UL, 0x750bdabfUL, 0x3ff6c012UL, 0x67ff0b0dUL, 0xbc728956UL, - 0xddd47645UL, 0x3ff6cfdcUL, 0xb6f17309UL, 0x3c9c7aa9UL, 0x3c651a2fUL, - 0x3ff6dfb2UL, 0x683c88abUL, 0xbc6bbe3aUL, 0x98593ae5UL, 0x3ff6ef92UL, - 0x9e1ac8b2UL, 0xbc90b974UL, 0xf9519484UL, 0x3ff6ff7dUL, 0x25860ef6UL, - 0xbc883c0fUL, 0x66f42e87UL, 0x3ff70f74UL, 0xd45aa65fUL, 0x3c59d644UL, - 0xe8ec5f74UL, 0x3ff71f75UL, 0x86887a99UL, 0xbc816e47UL, 0x86ead08aUL, - 0x3ff72f82UL, 0x2cd62c72UL, 0xbc920aa0UL, 0x48a58174UL, 0x3ff73f9aUL, - 0x6c65d53cUL, 0xbc90a8d9UL, 0x35d7cbfdUL, 0x3ff74fbdUL, 0x618a6e1cUL, - 0x3c9047fdUL, 0x564267c9UL, 0x3ff75febUL, 0x57316dd3UL, 0xbc902459UL, - 0xb1ab6e09UL, 0x3ff77024UL, 0x169147f8UL, 0x3c9b7877UL, 0x4fde5d3fUL, - 0x3ff78069UL, 0x0a02162dUL, 0x3c9866b8UL, 0x38ac1cf6UL, 0x3ff790b9UL, - 0x62aadd3eUL, 0x3c9349a8UL, 0x73eb0187UL, 0x3ff7a114UL, 0xee04992fUL, - 0xbc841577UL, 0x0976cfdbUL, 0x3ff7b17bUL, 0x8468dc88UL, 0xbc9bebb5UL, - 0x0130c132UL, 0x3ff7c1edUL, 0xd1164dd6UL, 0x3c9f124cUL, 0x62ff86f0UL, - 0x3ff7d26aUL, 0xfb72b8b4UL, 0x3c91bddbUL, 0x36cf4e62UL, 0x3ff7e2f3UL, - 0xba15797eUL, 0x3c705d02UL, 0x8491c491UL, 0x3ff7f387UL, 0xcf9311aeUL, - 0xbc807f11UL, 0x543e1a12UL, 0x3ff80427UL, 0x626d972bUL, 0xbc927c86UL, - 0xadd106d9UL, 0x3ff814d2UL, 0x0d151d4dUL, 0x3c946437UL, 0x994cce13UL, - 0x3ff82589UL, 0xd41532d8UL, 0xbc9d4c1dUL, 0x1eb941f7UL, 0x3ff8364cUL, - 0x31df2bd5UL, 0x3c999b9aUL, 0x4623c7adUL, 0x3ff8471aUL, 0xa341cdfbUL, - 0xbc88d684UL, 0x179f5b21UL, 0x3ff857f4UL, 0xf8b216d0UL, 0xbc5ba748UL, - 0x9b4492edUL, 0x3ff868d9UL, 0x9bd4f6baUL, 0xbc9fc6f8UL, 0xd931a436UL, - 0x3ff879caUL, 0xd2db47bdUL, 0x3c85d2d7UL, 0xd98a6699UL, 0x3ff88ac7UL, - 0xf37cb53aUL, 0x3c9994c2UL, 0xa478580fUL, 0x3ff89bd0UL, 0x4475202aUL, - 0x3c9d5395UL, 0x422aa0dbUL, 0x3ff8ace5UL, 0x56864b27UL, 0x3c96e9f1UL, - 0xbad61778UL, 0x3ff8be05UL, 0xfc43446eUL, 0x3c9ecb5eUL, 0x16b5448cUL, - 0x3ff8cf32UL, 0x32e9e3aaUL, 0xbc70d55eUL, 0x5e0866d9UL, 0x3ff8e06aUL, - 0x6fc9b2e6UL, 0xbc97114aUL, 0x99157736UL, 0x3ff8f1aeUL, 0xa2e3976cUL, - 0x3c85cc13UL, 0xd0282c8aUL, 0x3ff902feUL, 0x85fe3fd2UL, 0x3c9592caUL, - 0x0b91ffc6UL, 0x3ff9145bUL, 0x2e582524UL, 0xbc9dd679UL, 0x53aa2fe2UL, - 0x3ff925c3UL, 0xa639db7fUL, 0xbc83455fUL, 0xb0cdc5e5UL, 0x3ff93737UL, - 0x81b57ebcUL, 0xbc675fc7UL, 0x2b5f98e5UL, 0x3ff948b8UL, 0x797d2d99UL, - 0xbc8dc3d6UL, 0xcbc8520fUL, 0x3ff95a44UL, 0x96a5f039UL, 0xbc764b7cUL, - 0x9a7670b3UL, 0x3ff96bddUL, 0x7f19c896UL, 0xbc5ba596UL, 0x9fde4e50UL, - 0x3ff97d82UL, 0x7c1b85d1UL, 0xbc9d185bUL, 0xe47a22a2UL, 0x3ff98f33UL, - 0xa24c78ecUL, 0x3c7cabdaUL, 0x70ca07baUL, 0x3ff9a0f1UL, 0x91cee632UL, - 0xbc9173bdUL, 0x4d53fe0dUL, 0x3ff9b2bbUL, 0x4df6d518UL, 0xbc9dd84eUL, - 0x82a3f090UL, 0x3ff9c491UL, 0xb071f2beUL, 0x3c7c7c46UL, 0x194bb8d5UL, - 0x3ff9d674UL, 0xa3dd8233UL, 0xbc9516beUL, 0x19e32323UL, 0x3ff9e863UL, - 0x78e64c6eUL, 0x3c7824caUL, 0x8d07f29eUL, 0x3ff9fa5eUL, 0xaaf1faceUL, - 0xbc84a9ceUL, 0x7b5de565UL, 0x3ffa0c66UL, 0x5d1cd533UL, 0xbc935949UL, - 0xed8eb8bbUL, 0x3ffa1e7aUL, 0xee8be70eUL, 0x3c9c6618UL, 0xec4a2d33UL, - 0x3ffa309bUL, 0x7ddc36abUL, 0x3c96305cUL, 0x80460ad8UL, 0x3ffa42c9UL, - 0x589fb120UL, 0xbc9aa780UL, 0xb23e255dUL, 0x3ffa5503UL, 0xdb8d41e1UL, - 0xbc9d2f6eUL, 0x8af46052UL, 0x3ffa674aUL, 0x30670366UL, 0x3c650f56UL, - 0x1330b358UL, 0x3ffa799eUL, 0xcac563c7UL, 0x3c9bcb7eUL, 0x53c12e59UL, - 0x3ffa8bfeUL, 0xb2ba15a9UL, 0xbc94f867UL, 0x5579fdbfUL, 0x3ffa9e6bUL, - 0x0ef7fd31UL, 0x3c90fac9UL, 0x21356ebaUL, 0x3ffab0e5UL, 0xdae94545UL, - 0x3c889c31UL, 0xbfd3f37aUL, 0x3ffac36bUL, 0xcae76cd0UL, 0xbc8f9234UL, - 0x3a3c2774UL, 0x3ffad5ffUL, 0xb6b1b8e5UL, 0x3c97ef3bUL, 0x995ad3adUL, - 0x3ffae89fUL, 0x345dcc81UL, 0x3c97a1cdUL, 0xe622f2ffUL, 0x3ffafb4cUL, - 0x0f315ecdUL, 0xbc94b2fcUL, 0x298db666UL, 0x3ffb0e07UL, 0x4c80e425UL, - 0xbc9bdef5UL, 0x6c9a8952UL, 0x3ffb20ceUL, 0x4a0756ccUL, 0x3c94dd02UL, - 0xb84f15fbUL, 0x3ffb33a2UL, 0x3084d708UL, 0xbc62805eUL, 0x15b749b1UL, - 0x3ffb4684UL, 0xe9df7c90UL, 0xbc7f763dUL, 0x8de5593aUL, 0x3ffb5972UL, - 0xbbba6de3UL, 0xbc9c71dfUL, 0x29f1c52aUL, 0x3ffb6c6eUL, 0x52883f6eUL, - 0x3c92a8f3UL, 0xf2fb5e47UL, 0x3ffb7f76UL, 0x7e54ac3bUL, 0xbc75584fUL, - 0xf22749e4UL, 0x3ffb928cUL, 0x54cb65c6UL, 0xbc9b7216UL, 0x30a1064aUL, - 0x3ffba5b0UL, 0x0e54292eUL, 0xbc9efcd3UL, 0xb79a6f1fUL, 0x3ffbb8e0UL, - 0xc9696205UL, 0xbc3f52d1UL, 0x904bc1d2UL, 0x3ffbcc1eUL, 0x7a2d9e84UL, - 0x3c823dd0UL, 0xc3f3a207UL, 0x3ffbdf69UL, 0x60ea5b53UL, 0xbc3c2623UL, - 0x5bd71e09UL, 0x3ffbf2c2UL, 0x3f6b9c73UL, 0xbc9efdcaUL, 0x6141b33dUL, - 0x3ffc0628UL, 0xa1fbca34UL, 0xbc8d8a5aUL, 0xdd85529cUL, 0x3ffc199bUL, - 0x895048ddUL, 0x3c811065UL, 0xd9fa652cUL, 0x3ffc2d1cUL, 0x17c8a5d7UL, - 0xbc96e516UL, 0x5fffd07aUL, 0x3ffc40abUL, 0xe083c60aUL, 0x3c9b4537UL, - 0x78fafb22UL, 0x3ffc5447UL, 0x2493b5afUL, 0x3c912f07UL, 0x2e57d14bUL, - 0x3ffc67f1UL, 0xff483cadUL, 0x3c92884dUL, 0x8988c933UL, 0x3ffc7ba8UL, - 0xbe255559UL, 0xbc8e76bbUL, 0x9406e7b5UL, 0x3ffc8f6dUL, 0x48805c44UL, - 0x3c71acbcUL, 0x5751c4dbUL, 0x3ffca340UL, 0xd10d08f5UL, 0xbc87f2beUL, - 0xdcef9069UL, 0x3ffcb720UL, 0xd1e949dbUL, 0x3c7503cbUL, 0x2e6d1675UL, - 0x3ffccb0fUL, 0x86009092UL, 0xbc7d220fUL, 0x555dc3faUL, 0x3ffcdf0bUL, - 0x53829d72UL, 0xbc8dd83bUL, 0x5b5bab74UL, 0x3ffcf315UL, 0xb86dff57UL, - 0xbc9a08e9UL, 0x4a07897cUL, 0x3ffd072dUL, 0x43797a9cUL, 0xbc9cbc37UL, - 0x2b08c968UL, 0x3ffd1b53UL, 0x219a36eeUL, 0x3c955636UL, 0x080d89f2UL, - 0x3ffd2f87UL, 0x719d8578UL, 0xbc9d487bUL, 0xeacaa1d6UL, 0x3ffd43c8UL, - 0xbf5a1614UL, 0x3c93db53UL, 0xdcfba487UL, 0x3ffd5818UL, 0xd75b3707UL, - 0x3c82ed02UL, 0xe862e6d3UL, 0x3ffd6c76UL, 0x4a8165a0UL, 0x3c5fe87aUL, - 0x16c98398UL, 0x3ffd80e3UL, 0x8beddfe8UL, 0xbc911ec1UL, 0x71ff6075UL, - 0x3ffd955dUL, 0xbb9af6beUL, 0x3c9a052dUL, 0x03db3285UL, 0x3ffda9e6UL, - 0x696db532UL, 0x3c9c2300UL, 0xd63a8315UL, 0x3ffdbe7cUL, 0x926b8be4UL, - 0xbc9b76f1UL, 0xf301b460UL, 0x3ffdd321UL, 0x78f018c3UL, 0x3c92da57UL, - 0x641c0658UL, 0x3ffde7d5UL, 0x8e79ba8fUL, 0xbc9ca552UL, 0x337b9b5fUL, - 0x3ffdfc97UL, 0x4f184b5cUL, 0xbc91a5cdUL, 0x6b197d17UL, 0x3ffe1167UL, - 0xbd5c7f44UL, 0xbc72b529UL, 0x14f5a129UL, 0x3ffe2646UL, 0x817a1496UL, - 0xbc97b627UL, 0x3b16ee12UL, 0x3ffe3b33UL, 0x31fdc68bUL, 0xbc99f4a4UL, - 0xe78b3ff6UL, 0x3ffe502eUL, 0x80a9cc8fUL, 0x3c839e89UL, 0x24676d76UL, - 0x3ffe6539UL, 0x7522b735UL, 0xbc863ff8UL, 0xfbc74c83UL, 0x3ffe7a51UL, - 0xca0c8de2UL, 0x3c92d522UL, 0x77cdb740UL, 0x3ffe8f79UL, 0x80b054b1UL, - 0xbc910894UL, 0xa2a490daUL, 0x3ffea4afUL, 0x179c2893UL, 0xbc9e9c23UL, - 0x867cca6eUL, 0x3ffeb9f4UL, 0x2293e4f2UL, 0x3c94832fUL, 0x2d8e67f1UL, - 0x3ffecf48UL, 0xb411ad8cUL, 0xbc9c93f3UL, 0xa2188510UL, 0x3ffee4aaUL, - 0xa487568dUL, 0x3c91c68dUL, 0xee615a27UL, 0x3ffefa1bUL, 0x86a4b6b0UL, - 0x3c9dc7f4UL, 0x1cb6412aUL, 0x3fff0f9cUL, 0x65181d45UL, 0xbc932200UL, - 0x376bba97UL, 0x3fff252bUL, 0xbf0d8e43UL, 0x3c93a1a5UL, 0x48dd7274UL, - 0x3fff3ac9UL, 0x3ed837deUL, 0xbc795a5aUL, 0x5b6e4540UL, 0x3fff5076UL, - 0x2dd8a18bUL, 0x3c99d3e1UL, 0x798844f8UL, 0x3fff6632UL, 0x3539343eUL, - 0x3c9fa37bUL, 0xad9cbe14UL, 0x3fff7bfdUL, 0xd006350aUL, 0xbc9dbb12UL, - 0x02243c89UL, 0x3fff91d8UL, 0xa779f689UL, 0xbc612ea8UL, 0x819e90d8UL, - 0x3fffa7c1UL, 0xf3a5931eUL, 0x3c874853UL, 0x3692d514UL, 0x3fffbdbaUL, - 0x15098eb6UL, 0xbc796773UL, 0x2b8f71f1UL, 0x3fffd3c2UL, 0x966579e7UL, - 0x3c62eb74UL, 0x6b2a23d9UL, 0x3fffe9d9UL, 0x7442fde3UL, 0x3c74a603UL, - 0xe78a6731UL, 0x3f55d87fUL, 0xd704a0c0UL, 0x3fac6b08UL, 0x6fba4e77UL, - 0x3f83b2abUL, 0xff82c58fUL, 0x3fcebfbdUL, 0xfefa39efUL, 0x3fe62e42UL, - 0x00000000UL, 0x00000000UL, 0xfefa39efUL, 0x3fe62e42UL, 0xfefa39efUL, - 0xbfe62e42UL, 0xf8000000UL, 0xffffffffUL, 0xf8000000UL, 0xffffffffUL, - 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL - -}; - -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE2 = 2.0; -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0 = 0.0; -ATTRIBUTE_ALIGNED(8) static const double _DOUBLE0DOT5 = 0.5; - -//registers, -// input: xmm0, xmm1 -// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7 -// eax, edx, ecx, ebx - -// Code generated by Intel C compiler for LIBM library - -void MacroAssembler::fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2; - Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2, L_2TAG_PACKET_10_0_2, L_2TAG_PACKET_11_0_2; - Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, L_2TAG_PACKET_14_0_2, L_2TAG_PACKET_15_0_2; - Label L_2TAG_PACKET_16_0_2, L_2TAG_PACKET_17_0_2, L_2TAG_PACKET_18_0_2, L_2TAG_PACKET_19_0_2; - Label L_2TAG_PACKET_20_0_2, L_2TAG_PACKET_21_0_2, L_2TAG_PACKET_22_0_2, L_2TAG_PACKET_23_0_2; - Label L_2TAG_PACKET_24_0_2, L_2TAG_PACKET_25_0_2, L_2TAG_PACKET_26_0_2, L_2TAG_PACKET_27_0_2; - Label L_2TAG_PACKET_28_0_2, L_2TAG_PACKET_29_0_2, L_2TAG_PACKET_30_0_2, L_2TAG_PACKET_31_0_2; - Label L_2TAG_PACKET_32_0_2, L_2TAG_PACKET_33_0_2, L_2TAG_PACKET_34_0_2, L_2TAG_PACKET_35_0_2; - Label L_2TAG_PACKET_36_0_2, L_2TAG_PACKET_37_0_2, L_2TAG_PACKET_38_0_2, L_2TAG_PACKET_39_0_2; - Label L_2TAG_PACKET_40_0_2, L_2TAG_PACKET_41_0_2, L_2TAG_PACKET_42_0_2, L_2TAG_PACKET_43_0_2; - Label L_2TAG_PACKET_44_0_2, L_2TAG_PACKET_45_0_2, L_2TAG_PACKET_46_0_2, L_2TAG_PACKET_47_0_2; - Label L_2TAG_PACKET_48_0_2, L_2TAG_PACKET_49_0_2, L_2TAG_PACKET_50_0_2, L_2TAG_PACKET_51_0_2; - Label L_2TAG_PACKET_52_0_2, L_2TAG_PACKET_53_0_2, L_2TAG_PACKET_54_0_2, L_2TAG_PACKET_55_0_2; - Label L_2TAG_PACKET_56_0_2, L_2TAG_PACKET_57_0_2, L_2TAG_PACKET_58_0_2, start; - Label L_NOT_DOUBLE2, L_NOT_DOUBLE0DOT5; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_pow = (address)_static_const_table_pow; - address DOUBLE2 = (address) &_DOUBLE2; - address DOUBLE0 = (address) &_DOUBLE0; - address DOUBLE0DOT5 = (address) &_DOUBLE0DOT5; - - subl(rsp, 120); - movl(Address(rsp, 64), tmp); - lea(tmp, ExternalAddress(static_const_table_pow)); - movsd(xmm0, Address(rsp, 128)); - movsd(xmm1, Address(rsp, 136)); - - // Special case: pow(x, 2.0) => x * x - ucomisd(xmm1, ExternalAddress(DOUBLE2)); - jccb(Assembler::notEqual, L_NOT_DOUBLE2); - jccb(Assembler::parity, L_NOT_DOUBLE2); - mulsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_NOT_DOUBLE2); - // Special case: pow(x, 0.5) => sqrt(x) - ucomisd(xmm1, ExternalAddress(DOUBLE0DOT5)); // For pow(x, y), check whether y == 0.5 - jccb(Assembler::notEqual, L_NOT_DOUBLE0DOT5); - jccb(Assembler::parity, L_NOT_DOUBLE0DOT5); - ucomisd(xmm0, ExternalAddress(DOUBLE0)); - // According to the API specs, pow(-0.0, 0.5) = 0.0 and sqrt(-0.0) = -0.0. - // So pow(-0.0, 0.5) shouldn't be replaced with sqrt(-0.0). - // -0.0/+0.0 are both excluded since floating-point comparison doesn't distinguish -0.0 from +0.0. - jccb(Assembler::belowEqual, L_NOT_DOUBLE0DOT5); // pow(x, 0.5) => sqrt(x) only for x > 0.0 - sqrtsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_NOT_DOUBLE0DOT5); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movl(ecx, 1069088768); - movdl(xmm7, ecx); - movsd(Address(rsp, 16), xmm1); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(Address(rsp, 8), xmm0); - movdqu(xmm3, xmm0); - movl(edx, 8192); - movdl(xmm4, edx); - movdqu(xmm6, Address(tmp, 8240)); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - addl(ecx, 16); - bsrl(ecx, ecx); - psrlq(xmm3, 12); - movl(Address(rsp, 24), rsi); - subl(eax, 16); - cmpl(eax, 32736); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2); - movl(rsi, 0); - - bind(L_2TAG_PACKET_1_0_2); - mulss(xmm0, xmm7); - movl(edx, -1); - subl(ecx, 4); - shll(edx); - movdl(xmm5, edx); - por(xmm3, xmm1); - subl(eax, 16351); - cmpl(eax, 1); - jcc(Assembler::belowEqual, L_2TAG_PACKET_2_0_2); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - - bind(L_2TAG_PACKET_3_0_2); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - subl(eax, 1); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - - bind(L_2TAG_PACKET_4_0_2); - mulsd(xmm3, xmm0); - movdqu(xmm1, Address(tmp, 8272)); - subsd(xmm5, xmm2); - movdqu(xmm4, Address(tmp, 8288)); - movl(ecx, eax); - sarl(eax, 31); - addl(ecx, eax); - xorl(eax, ecx); - addl(eax, 1); - bsrl(eax, eax); - unpcklpd(xmm5, xmm3); - movdqu(xmm6, Address(tmp, 8304)); - addsd(xmm3, xmm5); - andl(edx, 16760832); - shrl(edx, 10); - addpd(xmm5, Address(tmp, edx, Address::times_1, -3616)); - movdqu(xmm0, Address(tmp, 8320)); - pshufd(xmm2, xmm3, 68); - mulsd(xmm3, xmm3); - mulpd(xmm1, xmm2); - mulpd(xmm4, xmm2); - addsd(xmm5, xmm7); - mulsd(xmm2, xmm3); - addpd(xmm6, xmm1); - mulsd(xmm3, xmm3); - addpd(xmm0, xmm4); - movsd(xmm1, Address(rsp, 16)); - movzwl(ecx, Address(rsp, 22)); - pshufd(xmm7, xmm5, 238); - movsd(xmm4, Address(tmp, 8368)); - mulpd(xmm6, xmm2); - pshufd(xmm3, xmm3, 68); - mulpd(xmm0, xmm2); - shll(eax, 4); - subl(eax, 15872); - andl(ecx, 32752); - addl(eax, ecx); - mulpd(xmm3, xmm6); - cmpl(eax, 624); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2); - xorpd(xmm6, xmm6); - movl(edx, 17080); - pinsrw(xmm6, edx, 3); - movdqu(xmm2, xmm1); - pand(xmm4, xmm1); - subsd(xmm1, xmm4); - mulsd(xmm4, xmm5); - addsd(xmm0, xmm7); - mulsd(xmm1, xmm5); - movdqu(xmm7, xmm6); - addsd(xmm6, xmm4); - addpd(xmm3, xmm0); - movdl(edx, xmm6); - subsd(xmm6, xmm7); - pshufd(xmm0, xmm3, 238); - subsd(xmm4, xmm6); - addsd(xmm0, xmm3); - movl(ecx, edx); - andl(edx, 255); - addl(edx, edx); - movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384)); - addsd(xmm4, xmm1); - mulsd(xmm2, xmm0); - movdqu(xmm7, Address(tmp, 12480)); - movdqu(xmm3, Address(tmp, 12496)); - shll(ecx, 12); - xorl(ecx, rsi); - andl(ecx, -1048576); - movdl(xmm6, ecx); - addsd(xmm2, xmm4); - movsd(xmm1, Address(tmp, 12512)); - pshufd(xmm0, xmm2, 68); - pshufd(xmm4, xmm2, 68); - mulpd(xmm0, xmm0); - movl(rsi, Address(rsp, 24)); - mulpd(xmm7, xmm4); - pshufd(xmm6, xmm6, 17); - mulsd(xmm1, xmm2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm1, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm1, xmm6); - pshufd(xmm3, xmm0, 238); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - addsd(xmm0, xmm1); - addsd(xmm0, xmm3); - addsd(xmm0, xmm5); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 128)); - movsd(xmm1, Address(rsp, 136)); - mulsd(xmm0, xmm1); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_0_0_2); - addl(eax, 16); - movl(edx, 32752); - andl(edx, eax); - cmpl(edx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_8_0_2); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_9_0_2); - - bind(L_2TAG_PACKET_10_0_2); - movl(ecx, Address(rsp, 16)); - xorl(edx, edx); - testl(ecx, ecx); - movl(ecx, 1); - cmovl(Assembler::notEqual, edx, ecx); - orl(edx, Address(rsp, 20)); - cmpl(edx, 1072693248); - jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm3, Address(rsp, 8)); - movdl(edx, xmm3); - psrlq(xmm3, 32); - movdl(ecx, xmm3); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - xorpd(xmm3, xmm3); - movl(eax, 18416); - pinsrw(xmm3, eax, 3); - mulsd(xmm0, xmm3); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm3, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - movdqu(xmm6, Address(tmp, 8240)); - psrlq(xmm3, 12); - mulss(xmm0, xmm7); - movl(edx, -1024); - movdl(xmm5, edx); - por(xmm3, xmm1); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - movl(rsi, 0); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - andl(eax, 32752); - subl(eax, 18416); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_12_0_2); - movl(ecx, Address(rsp, 16)); - xorl(edx, edx); - testl(ecx, ecx); - movl(ecx, 1); - cmovl(Assembler::notEqual, edx, ecx); - orl(edx, Address(rsp, 20)); - cmpl(edx, 1072693248); - jcc(Assembler::equal, L_2TAG_PACKET_7_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm3, Address(rsp, 8)); - movdl(edx, xmm3); - psrlq(xmm3, 32); - movdl(ecx, xmm3); - orl(edx, ecx); - cmpl(edx, 0); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - xorpd(xmm3, xmm3); - movl(eax, 18416); - pinsrw(xmm3, eax, 3); - mulsd(xmm0, xmm3); - xorpd(xmm2, xmm2); - movl(eax, 16368); - pinsrw(xmm2, eax, 3); - movdqu(xmm3, xmm0); - pextrw(eax, xmm0, 3); - por(xmm0, xmm2); - movl(ecx, 18416); - psllq(xmm0, 5); - movsd(xmm2, Address(tmp, 8256)); - psrlq(xmm0, 34); - rcpss(xmm0, xmm0); - psllq(xmm3, 12); - movdqu(xmm6, Address(tmp, 8240)); - psrlq(xmm3, 12); - mulss(xmm0, xmm7); - movl(edx, -1024); - movdl(xmm5, edx); - por(xmm3, xmm1); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - pand(xmm5, xmm3); - movl(rsi, INT_MIN); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - andl(eax, 32752); - subl(eax, 18416); - sarl(eax, 4); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - jmp(L_2TAG_PACKET_4_0_2); - - bind(L_2TAG_PACKET_5_0_2); - cmpl(eax, 0); - jcc(Assembler::less, L_2TAG_PACKET_13_0_2); - cmpl(eax, 752); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_14_0_2); - - bind(L_2TAG_PACKET_15_0_2); - addsd(xmm0, xmm7); - movsd(xmm2, Address(tmp, 12544)); - addpd(xmm3, xmm0); - xorpd(xmm6, xmm6); - movl(eax, 17080); - pinsrw(xmm6, eax, 3); - pshufd(xmm0, xmm3, 238); - addsd(xmm0, xmm3); - movdqu(xmm3, xmm5); - addsd(xmm5, xmm0); - movdqu(xmm4, xmm2); - subsd(xmm3, xmm5); - movdqu(xmm7, xmm5); - pand(xmm5, xmm2); - movdqu(xmm2, xmm1); - pand(xmm4, xmm1); - subsd(xmm7, xmm5); - addsd(xmm0, xmm3); - subsd(xmm1, xmm4); - mulsd(xmm4, xmm5); - addsd(xmm0, xmm7); - mulsd(xmm2, xmm0); - movdqu(xmm7, xmm6); - mulsd(xmm1, xmm5); - addsd(xmm6, xmm4); - movdl(eax, xmm6); - subsd(xmm6, xmm7); - addsd(xmm2, xmm1); - movdqu(xmm7, Address(tmp, 12480)); - movdqu(xmm3, Address(tmp, 12496)); - subsd(xmm4, xmm6); - pextrw(edx, xmm6, 3); - movl(ecx, eax); - andl(eax, 255); - addl(eax, eax); - movdqu(xmm5, Address(tmp, eax, Address::times_8, 8384)); - addsd(xmm2, xmm4); - sarl(ecx, 8); - movl(eax, ecx); - sarl(ecx, 1); - subl(eax, ecx); - shll(ecx, 20); - xorl(ecx, rsi); - movdl(xmm6, ecx); - movsd(xmm1, Address(tmp, 12512)); - andl(edx, 32767); - cmpl(edx, 16529); - jcc(Assembler::above, L_2TAG_PACKET_14_0_2); - pshufd(xmm0, xmm2, 68); - pshufd(xmm4, xmm2, 68); - mulpd(xmm0, xmm0); - mulpd(xmm7, xmm4); - pshufd(xmm6, xmm6, 17); - mulsd(xmm1, xmm2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm1, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm1, xmm6); - pshufd(xmm3, xmm0, 238); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - shll(eax, 4); - xorpd(xmm4, xmm4); - addl(eax, 16368); - pinsrw(xmm4, eax, 3); - addsd(xmm0, xmm1); - movl(rsi, Address(rsp, 24)); - addsd(xmm0, xmm3); - movdqu(xmm1, xmm0); - addsd(xmm0, xmm5); - mulsd(xmm0, xmm4); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_16_0_2); - cmpl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_17_0_2); - - bind(L_2TAG_PACKET_18_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_8_0_2); - movsd(xmm1, Address(rsp, 16)); - movsd(xmm0, Address(rsp, 8)); - movdqu(xmm2, xmm0); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_19_0_2); - addsd(xmm0, xmm0); - movdl(eax, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_20_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_20_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - movl(edx, 29); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_22_0_2); - movsd(xmm0, Address(rsp, 16)); - addpd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_19_0_2); - movdl(eax, xmm1); - movdqu(xmm2, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_23_0_2); - pextrw(eax, xmm2, 3); - andl(eax, 32752); - cmpl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_24_0_2); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); - - bind(L_2TAG_PACKET_24_0_2); - pextrw(eax, xmm0, 3); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_25_0_2); - testl(ecx, INT_MIN); - jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_27_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2); - testl(eax, 2); - jcc(Assembler::notEqual, L_2TAG_PACKET_29_0_2); - jmp(L_2TAG_PACKET_28_0_2); - - bind(L_2TAG_PACKET_25_0_2); - shrl(ecx, 20); - andl(ecx, 2047); - cmpl(ecx, 1075); - jcc(Assembler::above, L_2TAG_PACKET_28_0_2); - jcc(Assembler::equal, L_2TAG_PACKET_30_0_2); - cmpl(ecx, 1074); - jcc(Assembler::above, L_2TAG_PACKET_27_0_2); - cmpl(ecx, 1023); - jcc(Assembler::below, L_2TAG_PACKET_28_0_2); - movsd(xmm1, Address(rsp, 16)); - movl(eax, 17208); - xorpd(xmm3, xmm3); - pinsrw(xmm3, eax, 3); - movdqu(xmm4, xmm3); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_28_0_2); - movdl(eax, xmm3); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_28_0_2); - - bind(L_2TAG_PACKET_29_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(eax, xmm1, 3); - andl(eax, 32768); - jcc(Assembler::equal, L_2TAG_PACKET_18_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32768); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_28_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(eax, xmm1, 3); - andl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_26_0_2); - - bind(L_2TAG_PACKET_31_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32752); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_30_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_28_0_2); - jmp(L_2TAG_PACKET_29_0_2); - - bind(L_2TAG_PACKET_32_0_2); - movdl(eax, xmm1); - psrlq(xmm1, 20); - movdl(edx, xmm1); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_33_0_2); - movsd(xmm0, Address(rsp, 16)); - addsd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_33_0_2); - movsd(xmm0, Address(rsp, 8)); - pextrw(eax, xmm0, 3); - cmpl(eax, 49136); - jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); - movdl(ecx, xmm0); - psrlq(xmm0, 20); - movdl(edx, xmm0); - orl(ecx, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_34_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32760); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_34_0_2); - movsd(xmm1, Address(rsp, 16)); - andl(eax, 32752); - subl(eax, 16368); - pextrw(edx, xmm1, 3); - xorpd(xmm0, xmm0); - xorl(eax, edx); - andl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); - movl(ecx, 32752); - pinsrw(xmm0, ecx, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_35_0_2); - movdl(eax, xmm1); - cmpl(edx, 17184); - jcc(Assembler::above, L_2TAG_PACKET_36_0_2); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2); - testl(eax, 2); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - jmp(L_2TAG_PACKET_39_0_2); - - bind(L_2TAG_PACKET_36_0_2); - testl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - jmp(L_2TAG_PACKET_39_0_2); - - bind(L_2TAG_PACKET_9_0_2); - movsd(xmm2, Address(rsp, 8)); - movdl(eax, xmm2); - psrlq(xmm2, 31); - movdl(ecx, xmm2); - orl(eax, ecx); - jcc(Assembler::equal, L_2TAG_PACKET_11_0_2); - movsd(xmm1, Address(rsp, 16)); - pextrw(edx, xmm1, 3); - movdl(eax, xmm1); - movdqu(xmm2, xmm1); - psrlq(xmm2, 32); - movdl(ecx, xmm2); - addl(ecx, ecx); - orl(ecx, eax); - jcc(Assembler::equal, L_2TAG_PACKET_40_0_2); - andl(edx, 32752); - cmpl(edx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_32_0_2); - cmpl(edx, 17200); - jcc(Assembler::above, L_2TAG_PACKET_38_0_2); - cmpl(edx, 17184); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_35_0_2); - cmpl(edx, 16368); - jcc(Assembler::below, L_2TAG_PACKET_37_0_2); - movl(eax, 17208); - xorpd(xmm2, xmm2); - pinsrw(xmm2, eax, 3); - movdqu(xmm4, xmm2); - addsd(xmm2, xmm1); - subsd(xmm4, xmm2); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32767); - jcc(Assembler::notEqual, L_2TAG_PACKET_37_0_2); - movdl(eax, xmm2); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_38_0_2); - - bind(L_2TAG_PACKET_39_0_2); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(xmm2, Address(tmp, 8256)); - movsd(xmm4, Address(rsp, 8)); - pextrw(eax, xmm4, 3); - movl(edx, 8192); - movdl(xmm4, edx); - andl(eax, 32767); - subl(eax, 16); - jcc(Assembler::less, L_2TAG_PACKET_12_0_2); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - addl(ecx, 16); - bsrl(ecx, ecx); - movl(rsi, INT_MIN); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_37_0_2); - xorpd(xmm1, xmm1); - movl(eax, 32752); - pinsrw(xmm1, eax, 3); - xorpd(xmm0, xmm0); - mulsd(xmm0, xmm1); - movl(edx, 28); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_38_0_2); - xorpd(xmm1, xmm1); - movl(edx, 30704); - pinsrw(xmm1, edx, 3); - movsd(xmm2, Address(tmp, 8256)); - movsd(xmm4, Address(rsp, 8)); - pextrw(eax, xmm4, 3); - movl(edx, 8192); - movdl(xmm4, edx); - andl(eax, 32767); - subl(eax, 16); - jcc(Assembler::less, L_2TAG_PACKET_10_0_2); - movl(edx, eax); - andl(edx, 32752); - subl(edx, 16368); - movl(ecx, edx); - sarl(edx, 31); - addl(ecx, edx); - xorl(ecx, edx); - addl(ecx, 16); - bsrl(ecx, ecx); - movl(rsi, 0); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_23_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_26_0_2); - xorpd(xmm0, xmm0); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_13_0_2); - addl(eax, 384); - cmpl(eax, 0); - jcc(Assembler::less, L_2TAG_PACKET_41_0_2); - mulsd(xmm5, xmm1); - addsd(xmm0, xmm7); - shrl(rsi, 31); - addpd(xmm3, xmm0); - pshufd(xmm0, xmm3, 238); - addsd(xmm3, xmm0); - movsd(xmm4, Address(tmp, rsi, Address::times_8, 12528)); - mulsd(xmm1, xmm3); - xorpd(xmm0, xmm0); - movl(eax, 16368); - shll(rsi, 15); - orl(eax, rsi); - pinsrw(xmm0, eax, 3); - addsd(xmm5, xmm1); - movl(rsi, Address(rsp, 24)); - mulsd(xmm5, xmm4); - addsd(xmm0, xmm5); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_41_0_2); - movl(rsi, Address(rsp, 24)); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_40_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_42_0_2); - xorpd(xmm0, xmm0); - movl(eax, 16368); - pinsrw(xmm0, eax, 3); - movl(edx, 26); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_11_0_2); - movsd(xmm1, Address(rsp, 16)); - movdqu(xmm2, xmm1); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - cmpl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_43_0_2); - movdl(eax, xmm2); - psrlq(xmm2, 20); - movdl(edx, xmm2); - orl(eax, edx); - jcc(Assembler::notEqual, L_2TAG_PACKET_22_0_2); - - bind(L_2TAG_PACKET_43_0_2); - movdl(eax, xmm1); - psrlq(xmm1, 32); - movdl(edx, xmm1); - movl(ecx, edx); - addl(edx, edx); - orl(eax, edx); - jcc(Assembler::equal, L_2TAG_PACKET_42_0_2); - shrl(edx, 21); - cmpl(edx, 1075); - jcc(Assembler::above, L_2TAG_PACKET_44_0_2); - jcc(Assembler::equal, L_2TAG_PACKET_45_0_2); - cmpl(edx, 1023); - jcc(Assembler::below, L_2TAG_PACKET_44_0_2); - movsd(xmm1, Address(rsp, 16)); - movl(eax, 17208); - xorpd(xmm3, xmm3); - pinsrw(xmm3, eax, 3); - movdqu(xmm4, xmm3); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - pextrw(eax, xmm1, 3); - andl(eax, 32752); - jcc(Assembler::notEqual, L_2TAG_PACKET_44_0_2); - movdl(eax, xmm3); - andl(eax, 1); - jcc(Assembler::equal, L_2TAG_PACKET_44_0_2); - - bind(L_2TAG_PACKET_46_0_2); - movsd(xmm0, Address(rsp, 8)); - testl(ecx, INT_MIN); - jcc(Assembler::notEqual, L_2TAG_PACKET_47_0_2); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_45_0_2); - movsd(xmm1, Address(rsp, 16)); - movdl(eax, xmm1); - testl(eax, 1); - jcc(Assembler::notEqual, L_2TAG_PACKET_46_0_2); - - bind(L_2TAG_PACKET_44_0_2); - testl(ecx, INT_MIN); - jcc(Assembler::equal, L_2TAG_PACKET_26_0_2); - xorpd(xmm0, xmm0); - - bind(L_2TAG_PACKET_47_0_2); - movl(eax, 16368); - xorpd(xmm1, xmm1); - pinsrw(xmm1, eax, 3); - divsd(xmm1, xmm0); - movdqu(xmm0, xmm1); - movl(edx, 27); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_14_0_2); - movsd(xmm2, Address(rsp, 8)); - movsd(xmm6, Address(rsp, 16)); - pextrw(eax, xmm2, 3); - pextrw(edx, xmm6, 3); - movl(ecx, 32752); - andl(ecx, edx); - cmpl(ecx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_48_0_2); - andl(eax, 32752); - subl(eax, 16368); - xorl(edx, eax); - testl(edx, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_49_0_2); - - bind(L_2TAG_PACKET_50_0_2); - movl(eax, 32736); - pinsrw(xmm0, eax, 3); - shrl(rsi, 16); - orl(eax, rsi); - pinsrw(xmm1, eax, 3); - movl(rsi, Address(rsp, 24)); - mulsd(xmm0, xmm1); - - bind(L_2TAG_PACKET_17_0_2); - movl(edx, 24); - - bind(L_2TAG_PACKET_21_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_49_0_2); - movl(eax, 16); - pinsrw(xmm0, eax, 3); - mulsd(xmm0, xmm0); - testl(rsi, INT_MIN); - jcc(Assembler::equal, L_2TAG_PACKET_51_0_2); - movsd(xmm2, Address(tmp, 12560)); - xorpd(xmm0, xmm2); - - bind(L_2TAG_PACKET_51_0_2); - movl(rsi, Address(rsp, 24)); - movl(edx, 25); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_16_0_2); - pextrw(ecx, xmm5, 3); - pextrw(edx, xmm4, 3); - movl(eax, -1); - andl(ecx, 32752); - subl(ecx, 16368); - andl(edx, 32752); - addl(edx, ecx); - movl(ecx, -31); - sarl(edx, 4); - subl(ecx, edx); - jcc(Assembler::lessEqual, L_2TAG_PACKET_52_0_2); - cmpl(ecx, 20); - jcc(Assembler::above, L_2TAG_PACKET_53_0_2); - shll(eax); - - bind(L_2TAG_PACKET_52_0_2); - movdl(xmm0, eax); - psllq(xmm0, 32); - pand(xmm0, xmm5); - subsd(xmm5, xmm0); - addsd(xmm5, xmm1); - mulsd(xmm0, xmm4); - mulsd(xmm5, xmm4); - addsd(xmm0, xmm5); - - bind(L_2TAG_PACKET_53_0_2); - movl(edx, 25); - jmp(L_2TAG_PACKET_21_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movzwl(ecx, Address(rsp, 22)); - movl(edx, INT_MIN); - movdl(xmm1, edx); - xorpd(xmm7, xmm7); - paddd(xmm0, xmm4); - psllq(xmm5, 32); - movdl(edx, xmm0); - psllq(xmm0, 29); - paddq(xmm1, xmm3); - pand(xmm5, xmm1); - andl(ecx, 32752); - cmpl(ecx, 16560); - jcc(Assembler::below, L_2TAG_PACKET_3_0_2); - pand(xmm0, xmm6); - subsd(xmm3, xmm5); - addl(eax, 16351); - shrl(eax, 4); - subl(eax, 1022); - cvtsi2sdl(xmm7, eax); - mulpd(xmm5, xmm0); - movsd(xmm4, Address(tmp, 0)); - mulsd(xmm3, xmm0); - movsd(xmm6, Address(tmp, 0)); - subsd(xmm5, xmm2); - movsd(xmm1, Address(tmp, 8)); - pshufd(xmm2, xmm3, 68); - unpcklpd(xmm5, xmm3); - addsd(xmm3, xmm5); - movsd(xmm0, Address(tmp, 8)); - andl(edx, 16760832); - shrl(edx, 10); - addpd(xmm7, Address(tmp, edx, Address::times_1, -3616)); - mulsd(xmm4, xmm5); - mulsd(xmm0, xmm5); - mulsd(xmm6, xmm2); - mulsd(xmm1, xmm2); - movdqu(xmm2, xmm5); - mulsd(xmm4, xmm5); - addsd(xmm5, xmm0); - movdqu(xmm0, xmm7); - addsd(xmm2, xmm3); - addsd(xmm7, xmm5); - mulsd(xmm6, xmm2); - subsd(xmm0, xmm7); - movdqu(xmm2, xmm7); - addsd(xmm7, xmm4); - addsd(xmm0, xmm5); - subsd(xmm2, xmm7); - addsd(xmm4, xmm2); - pshufd(xmm2, xmm5, 238); - movdqu(xmm5, xmm7); - addsd(xmm7, xmm2); - addsd(xmm4, xmm0); - movdqu(xmm0, Address(tmp, 8272)); - subsd(xmm5, xmm7); - addsd(xmm6, xmm4); - movdqu(xmm4, xmm7); - addsd(xmm5, xmm2); - addsd(xmm7, xmm1); - movdqu(xmm2, Address(tmp, 8336)); - subsd(xmm4, xmm7); - addsd(xmm6, xmm5); - addsd(xmm4, xmm1); - pshufd(xmm5, xmm7, 238); - movdqu(xmm1, xmm7); - addsd(xmm7, xmm5); - subsd(xmm1, xmm7); - addsd(xmm1, xmm5); - movdqu(xmm5, Address(tmp, 8352)); - pshufd(xmm3, xmm3, 68); - addsd(xmm6, xmm4); - addsd(xmm6, xmm1); - movdqu(xmm1, Address(tmp, 8304)); - mulpd(xmm0, xmm3); - mulpd(xmm2, xmm3); - pshufd(xmm4, xmm3, 68); - mulpd(xmm3, xmm3); - addpd(xmm0, xmm1); - addpd(xmm5, xmm2); - mulsd(xmm4, xmm3); - movsd(xmm2, Address(tmp, 16)); - mulpd(xmm3, xmm3); - movsd(xmm1, Address(rsp, 16)); - movzwl(ecx, Address(rsp, 22)); - mulpd(xmm0, xmm4); - pextrw(eax, xmm7, 3); - mulpd(xmm5, xmm4); - mulpd(xmm0, xmm3); - movsd(xmm4, Address(tmp, 8376)); - pand(xmm2, xmm7); - addsd(xmm5, xmm6); - subsd(xmm7, xmm2); - addpd(xmm5, xmm0); - andl(eax, 32752); - subl(eax, 16368); - andl(ecx, 32752); - cmpl(ecx, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_48_0_2); - addl(ecx, eax); - cmpl(ecx, 16576); - jcc(Assembler::aboveEqual, L_2TAG_PACKET_54_0_2); - pshufd(xmm0, xmm5, 238); - pand(xmm4, xmm1); - movdqu(xmm3, xmm1); - addsd(xmm5, xmm0); - subsd(xmm1, xmm4); - xorpd(xmm6, xmm6); - movl(edx, 17080); - pinsrw(xmm6, edx, 3); - addsd(xmm7, xmm5); - mulsd(xmm4, xmm2); - mulsd(xmm1, xmm2); - movdqu(xmm5, xmm6); - mulsd(xmm3, xmm7); - addsd(xmm6, xmm4); - addsd(xmm1, xmm3); - movdqu(xmm7, Address(tmp, 12480)); - movdl(edx, xmm6); - subsd(xmm6, xmm5); - movdqu(xmm3, Address(tmp, 12496)); - movsd(xmm2, Address(tmp, 12512)); - subsd(xmm4, xmm6); - movl(ecx, edx); - andl(edx, 255); - addl(edx, edx); - movdqu(xmm5, Address(tmp, edx, Address::times_8, 8384)); - addsd(xmm4, xmm1); - pextrw(edx, xmm6, 3); - shrl(ecx, 8); - movl(eax, ecx); - shrl(ecx, 1); - subl(eax, ecx); - shll(ecx, 20); - movdl(xmm6, ecx); - pshufd(xmm0, xmm4, 68); - pshufd(xmm1, xmm4, 68); - mulpd(xmm0, xmm0); - mulpd(xmm7, xmm1); - pshufd(xmm6, xmm6, 17); - mulsd(xmm2, xmm4); - andl(edx, 32767); - cmpl(edx, 16529); - jcc(Assembler::above, L_2TAG_PACKET_14_0_2); - mulsd(xmm0, xmm0); - paddd(xmm5, xmm6); - addpd(xmm3, xmm7); - mulsd(xmm2, xmm5); - pshufd(xmm6, xmm5, 238); - mulpd(xmm0, xmm3); - addsd(xmm2, xmm6); - pshufd(xmm3, xmm0, 238); - addl(eax, 1023); - shll(eax, 20); - orl(eax, rsi); - movdl(xmm4, eax); - mulsd(xmm0, xmm5); - mulsd(xmm3, xmm5); - addsd(xmm0, xmm2); - psllq(xmm4, 32); - addsd(xmm0, xmm3); - movdqu(xmm1, xmm0); - addsd(xmm0, xmm5); - movl(rsi, Address(rsp, 24)); - mulsd(xmm0, xmm4); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_16_0_2); - cmpl(eax, 32752); - jcc(Assembler::equal, L_2TAG_PACKET_17_0_2); - - bind(L_2TAG_PACKET_55_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_6_0_2); - - bind(L_2TAG_PACKET_48_0_2); - movl(rsi, Address(rsp, 24)); - - bind(L_2TAG_PACKET_56_0_2); - movsd(xmm0, Address(rsp, 8)); - movsd(xmm1, Address(rsp, 16)); - addsd(xmm1, xmm1); - xorpd(xmm2, xmm2); - movl(eax, 49136); - pinsrw(xmm2, eax, 3); - addsd(xmm2, xmm0); - pextrw(eax, xmm2, 3); - cmpl(eax, 0); - jcc(Assembler::notEqual, L_2TAG_PACKET_57_0_2); - xorpd(xmm0, xmm0); - movl(eax, 32760); - pinsrw(xmm0, eax, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_57_0_2); - movdl(edx, xmm1); - movdqu(xmm3, xmm1); - psrlq(xmm3, 20); - movdl(ecx, xmm3); - orl(ecx, edx); - jcc(Assembler::equal, L_2TAG_PACKET_58_0_2); - addsd(xmm1, xmm1); - movdqu(xmm0, xmm1); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_58_0_2); - pextrw(eax, xmm0, 3); - andl(eax, 32752); - pextrw(edx, xmm1, 3); - xorpd(xmm0, xmm0); - subl(eax, 16368); - xorl(eax, edx); - testl(eax, 32768); - jcc(Assembler::notEqual, L_2TAG_PACKET_18_0_2); - movl(edx, 32752); - pinsrw(xmm0, edx, 3); - jmp(L_2TAG_PACKET_18_0_2); - - bind(L_2TAG_PACKET_54_0_2); - pextrw(eax, xmm1, 3); - pextrw(ecx, xmm2, 3); - xorl(eax, ecx); - testl(eax, 32768); - jcc(Assembler::equal, L_2TAG_PACKET_50_0_2); - jmp(L_2TAG_PACKET_49_0_2); - - bind(L_2TAG_PACKET_6_0_2); - movl(tmp, Address(rsp, 64)); - -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp deleted file mode 100644 index 492d596f84b46..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_sin.cpp +++ /dev/null @@ -1,1743 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "stubRoutines_x86.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - SIN() -// --------------------- -// -// 1. RANGE REDUCTION -// -// We perform an initial range reduction from X to r with -// -// X =~= N * pi/32 + r -// -// so that |r| <= pi/64 + epsilon. We restrict inputs to those -// where |N| <= 932560. Beyond this, the range reduction is -// insufficiently accurate. For extremely small inputs, -// denormalization can occur internally, impacting performance. -// This means that the main path is actually only taken for -// 2^-252 <= |X| < 90112. -// -// To avoid branches, we perform the range reduction to full -// accuracy each time. -// -// X - N * (P_1 + P_2 + P_3) -// -// where P_1 and P_2 are 32-bit numbers (so multiplication by N -// is exact) and P_3 is a 53-bit number. Together, these -// approximate pi well enough for all cases in the restricted -// range. -// -// The main reduction sequence is: -// -// y = 32/pi * x -// N = integer(y) -// (computed by adding and subtracting off SHIFTER) -// -// m_1 = N * P_1 -// m_2 = N * P_2 -// r_1 = x - m_1 -// r = r_1 - m_2 -// (this r can be used for most of the calculation) -// -// c_1 = r_1 - r -// m_3 = N * P_3 -// c_2 = c_1 - m_2 -// c = c_2 - m_3 -// -// 2. MAIN ALGORITHM -// -// The algorithm uses a table lookup based on B = M * pi / 32 -// where M = N mod 64. The stored values are: -// sigma closest power of 2 to cos(B) -// C_hl 53-bit cos(B) - sigma -// S_hi + S_lo 2 * 53-bit sin(B) -// -// The computation is organized as follows: -// -// sin(B + r + c) = [sin(B) + sigma * r] + -// r * (cos(B) - sigma) + -// sin(B) * [cos(r + c) - 1] + -// cos(B) * [sin(r + c) - r] -// -// which is approximately: -// -// [S_hi + sigma * r] + -// C_hl * r + -// S_lo + S_hi * [(cos(r) - 1) - r * c] + -// (C_hl + sigma) * [(sin(r) - r) + c] -// -// and this is what is actually computed. We separate this sum -// into four parts: -// -// hi + med + pols + corr -// -// where -// -// hi = S_hi + sigma r -// med = C_hl * r -// pols = S_hi * (cos(r) - 1) + (C_hl + sigma) * (sin(r) - r) -// corr = S_lo + c * ((C_hl + sigma) - S_hi * r) -// -// 3. POLYNOMIAL -// -// The polynomial S_hi * (cos(r) - 1) + (C_hl + sigma) * -// (sin(r) - r) can be rearranged freely, since it is quite -// small, so we exploit parallelism to the fullest. -// -// psc4 = SC_4 * r_1 -// msc4 = psc4 * r -// r2 = r * r -// msc2 = SC_2 * r2 -// r4 = r2 * r2 -// psc3 = SC_3 + msc4 -// psc1 = SC_1 + msc2 -// msc3 = r4 * psc3 -// sincospols = psc1 + msc3 -// pols = sincospols * -// -// -// 4. CORRECTION TERM -// -// This is where the "c" component of the range reduction is -// taken into account; recall that just "r" is used for most of -// the calculation. -// -// -c = m_3 - c_2 -// -d = S_hi * r - (C_hl + sigma) -// corr = -c * -d + S_lo -// -// 5. COMPENSATED SUMMATIONS -// -// The two successive compensated summations add up the high -// and medium parts, leaving just the low parts to add up at -// the end. -// -// rs = sigma * r -// res_int = S_hi + rs -// k_0 = S_hi - res_int -// k_2 = k_0 + rs -// med = C_hl * r -// res_hi = res_int + med -// k_1 = res_int - res_hi -// k_3 = k_1 + med -// -// 6. FINAL SUMMATION -// -// We now add up all the small parts: -// -// res_lo = pols(hi) + pols(lo) + corr + k_1 + k_3 -// -// Now the overall result is just: -// -// res_hi + res_lo -// -// 7. SMALL ARGUMENTS -// -// If |x| < SNN (SNN meaning the smallest normal number), we -// simply perform 0.1111111 cdots 1111 * x. For SNN <= |x|, we -// do 2^-55 * (2^55 * x - x). -// -// Special cases: -// sin(NaN) = quiet NaN, and raise invalid exception -// sin(INF) = NaN and raise invalid exception -// sin(+/-0) = +/-0 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant -ATTRIBUTE_ALIGNED(8) static const juint _zero_none[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint __4onpi_d[] = -{ - 0x6dc9c883UL, 0x3ff45f30UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _TWO_32H[] = -{ - 0x00000000UL, 0x41f80000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_3d[] = -{ - 0x54442d00UL, 0x3fe921fbUL, 0x98cc5180UL, 0x3ce84698UL, 0xcbb5bf6cUL, - 0xb9dfc8f8UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_5d[] = -{ - 0x54400000UL, 0x3fe921fbUL, 0x1a600000UL, 0x3dc0b461UL, 0x2e000000UL, - 0x3b93198aUL, 0x25200000UL, 0x396b839aUL, 0x533e63a0UL, 0x37027044UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _SCALE[] = -{ - 0x00000000UL, 0x32600000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _zeros[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x80000000UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _pi04_2d[] = -{ - 0x54400000UL, 0x3fe921fbUL, 0x1a626331UL, 0x3dc0b461UL -}; - -ATTRIBUTE_ALIGNED(4) static const juint _TWO_12H[] = -{ - 0x00000000UL, 0x40b80000UL -}; - -ATTRIBUTE_ALIGNED(2) static const jushort __4onpi_31l[] = -{ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x836e, 0xa2f9, - 0x40d8, 0x0000, 0x0000, 0x0000, 0x2a50, 0x9c88, 0x40b7, 0x0000, 0x0000, 0x0000, - 0xabe8, 0xfe13, 0x4099, 0x0000, 0x0000, 0x0000, 0x6ee0, 0xfa9a, 0x4079, 0x0000, - 0x0000, 0x0000, 0x9580, 0xdb62, 0x4058, 0x0000, 0x0000, 0x0000, 0x1c82, 0xc9e2, - 0x403d, 0x0000, 0x0000, 0x0000, 0xb1c0, 0xff28, 0x4019, 0x0000, 0x0000, 0x0000, - 0xef14, 0xaf7a, 0x3ffe, 0x0000, 0x0000, 0x0000, 0x48dc, 0xc36e, 0x3fdf, 0x0000, - 0x0000, 0x0000, 0x3740, 0xe909, 0x3fbe, 0x0000, 0x0000, 0x0000, 0x924a, 0xb801, - 0x3fa2, 0x0000, 0x0000, 0x0000, 0x3a32, 0xdd41, 0x3f83, 0x0000, 0x0000, 0x0000, - 0x8778, 0x873f, 0x3f62, 0x0000, 0x0000, 0x0000, 0x1298, 0xb1cb, 0x3f44, 0x0000, - 0x0000, 0x0000, 0xa208, 0x9cfb, 0x3f26, 0x0000, 0x0000, 0x0000, 0xbaec, 0xd7d4, - 0x3f06, 0x0000, 0x0000, 0x0000, 0xd338, 0x8909, 0x3ee7, 0x0000, 0x0000, 0x0000, - 0x68b8, 0xe04d, 0x3ec7, 0x0000, 0x0000, 0x0000, 0x4e64, 0xdf90, 0x3eaa, 0x0000, - 0x0000, 0x0000, 0xc1a8, 0xeb1c, 0x3e89, 0x0000, 0x0000, 0x0000, 0x2720, 0xce7d, - 0x3e6a, 0x0000, 0x0000, 0x0000, 0x77b8, 0x8bf1, 0x3e4b, 0x0000, 0x0000, 0x0000, - 0xec7e, 0xe4a0, 0x3e2e, 0x0000, 0x0000, 0x0000, 0xffbc, 0xf12f, 0x3e0f, 0x0000, - 0x0000, 0x0000, 0xfdc0, 0xb301, 0x3deb, 0x0000, 0x0000, 0x0000, 0xc5ac, 0x9788, - 0x3dd1, 0x0000, 0x0000, 0x0000, 0x47da, 0x829b, 0x3db2, 0x0000, 0x0000, 0x0000, - 0xd9e4, 0xa6cf, 0x3d93, 0x0000, 0x0000, 0x0000, 0x36e8, 0xf961, 0x3d73, 0x0000, - 0x0000, 0x0000, 0xf668, 0xf463, 0x3d54, 0x0000, 0x0000, 0x0000, 0x5168, 0xf2ff, - 0x3d35, 0x0000, 0x0000, 0x0000, 0x758e, 0xea4f, 0x3d17, 0x0000, 0x0000, 0x0000, - 0xf17a, 0xebe5, 0x3cf8, 0x0000, 0x0000, 0x0000, 0x9cfa, 0x9e83, 0x3cd9, 0x0000, - 0x0000, 0x0000, 0xa4ba, 0xe294, 0x3cba, 0x0000, 0x0000, 0x0000, 0xd7ec, 0x9afe, - 0x3c9a, 0x0000, 0x0000, 0x0000, 0xae80, 0x8fc6, 0x3c79, 0x0000, 0x0000, 0x0000, - 0x3304, 0x8560, 0x3c5c, 0x0000, 0x0000, 0x0000, 0x6d70, 0xdf8f, 0x3c3b, 0x0000, - 0x0000, 0x0000, 0x3ef0, 0xafc3, 0x3c1e, 0x0000, 0x0000, 0x0000, 0xd0d8, 0x826b, - 0x3bfe, 0x0000, 0x0000, 0x0000, 0x1c80, 0xed4f, 0x3bdd, 0x0000, 0x0000, 0x0000, - 0x730c, 0xb0af, 0x3bc1, 0x0000, 0x0000, 0x0000, 0x6660, 0xc219, 0x3ba2, 0x0000, - 0x0000, 0x0000, 0x940c, 0xabe2, 0x3b83, 0x0000, 0x0000, 0x0000, 0xdffc, 0x8408, - 0x3b64, 0x0000, 0x0000, 0x0000, 0x6b98, 0xc402, 0x3b45, 0x0000, 0x0000, 0x0000, - 0x1818, 0x9cc4, 0x3b26, 0x0000, 0x0000, 0x0000, 0x5390, 0xaab6, 0x3b05, 0x0000, - 0x0000, 0x0000, 0xb070, 0xd464, 0x3ae9, 0x0000, 0x0000, 0x0000, 0x231a, 0x9ef0, - 0x3aca, 0x0000, 0x0000, 0x0000, 0x0670, 0xd1f1, 0x3aaa, 0x0000, 0x0000, 0x0000, - 0x7738, 0xd9f3, 0x3a8a, 0x0000, 0x0000, 0x0000, 0xa834, 0x8092, 0x3a6c, 0x0000, - 0x0000, 0x0000, 0xb45c, 0xce23, 0x3a4d, 0x0000, 0x0000, 0x0000, 0x36e8, 0xb0e5, - 0x3a2d, 0x0000, 0x0000, 0x0000, 0xd156, 0xaf44, 0x3a10, 0x0000, 0x0000, 0x0000, - 0x9f52, 0x8c82, 0x39f1, 0x0000, 0x0000, 0x0000, 0x829c, 0xff83, 0x39d1, 0x0000, - 0x0000, 0x0000, 0x7d06, 0xefc6, 0x39b3, 0x0000, 0x0000, 0x0000, 0x93e0, 0xb0b7, - 0x3992, 0x0000, 0x0000, 0x0000, 0xedde, 0xc193, 0x3975, 0x0000, 0x0000, 0x0000, - 0xbbc0, 0xcf49, 0x3952, 0x0000, 0x0000, 0x0000, 0xbdf0, 0xd63c, 0x3937, 0x0000, - 0x0000, 0x0000, 0x1f34, 0x9f3a, 0x3918, 0x0000, 0x0000, 0x0000, 0x3f8e, 0xe579, - 0x38f9, 0x0000, 0x0000, 0x0000, 0x90c8, 0xc3f8, 0x38d9, 0x0000, 0x0000, 0x0000, - 0x48c0, 0xf8f8, 0x38b7, 0x0000, 0x0000, 0x0000, 0xed56, 0xafa6, 0x389c, 0x0000, - 0x0000, 0x0000, 0x8218, 0xb969, 0x387d, 0x0000, 0x0000, 0x0000, 0x1852, 0xec57, - 0x385e, 0x0000, 0x0000, 0x0000, 0x670c, 0xd674, 0x383e, 0x0000, 0x0000, 0x0000, - 0xad40, 0xc2c4, 0x3820, 0x0000, 0x0000, 0x0000, 0x2e80, 0xa696, 0x3801, 0x0000, - 0x0000, 0x0000, 0xd800, 0xc467, 0x37dc, 0x0000, 0x0000, 0x0000, 0x3c72, 0xc5ae, - 0x37c3, 0x0000, 0x0000, 0x0000, 0xb006, 0xac69, 0x37a4, 0x0000, 0x0000, 0x0000, - 0x34a0, 0x8cdf, 0x3782, 0x0000, 0x0000, 0x0000, 0x9ed2, 0xd25e, 0x3766, 0x0000, - 0x0000, 0x0000, 0x6fec, 0xaaaa, 0x3747, 0x0000, 0x0000, 0x0000, 0x6040, 0xfb5c, - 0x3726, 0x0000, 0x0000, 0x0000, 0x764c, 0xa3fc, 0x3708, 0x0000, 0x0000, 0x0000, - 0xb254, 0x954e, 0x36e9, 0x0000, 0x0000, 0x0000, 0x3e1c, 0xf5dc, 0x36ca, 0x0000, - 0x0000, 0x0000, 0x7b06, 0xc635, 0x36ac, 0x0000, 0x0000, 0x0000, 0xa8ba, 0xd738, - 0x368d, 0x0000, 0x0000, 0x0000, 0x06cc, 0xb24e, 0x366d, 0x0000, 0x0000, 0x0000, - 0x7108, 0xac76, 0x364f, 0x0000, 0x0000, 0x0000, 0x2324, 0xa7cb, 0x3630, 0x0000, - 0x0000, 0x0000, 0xac40, 0xef15, 0x360f, 0x0000, 0x0000, 0x0000, 0xae46, 0xd516, - 0x35f2, 0x0000, 0x0000, 0x0000, 0x615e, 0xe003, 0x35d3, 0x0000, 0x0000, 0x0000, - 0x0cf0, 0xefe7, 0x35b1, 0x0000, 0x0000, 0x0000, 0xfb50, 0xf98c, 0x3595, 0x0000, - 0x0000, 0x0000, 0x0abc, 0xf333, 0x3575, 0x0000, 0x0000, 0x0000, 0xdd60, 0xca3f, - 0x3555, 0x0000, 0x0000, 0x0000, 0x7eb6, 0xd87f, 0x3538, 0x0000, 0x0000, 0x0000, - 0x44f4, 0xb291, 0x3519, 0x0000, 0x0000, 0x0000, 0xff80, 0xc982, 0x34f6, 0x0000, - 0x0000, 0x0000, 0x9de0, 0xd9b8, 0x34db, 0x0000, 0x0000, 0x0000, 0xcd42, 0x9366, - 0x34bc, 0x0000, 0x0000, 0x0000, 0xbef0, 0xfaee, 0x349d, 0x0000, 0x0000, 0x0000, - 0xdac4, 0xb6f1, 0x347d, 0x0000, 0x0000, 0x0000, 0xf140, 0x94de, 0x345d, 0x0000, - 0x0000, 0x0000, 0xa218, 0x8b4b, 0x343e, 0x0000, 0x0000, 0x0000, 0x6380, 0xa135, - 0x341e, 0x0000, 0x0000, 0x0000, 0xb184, 0x8cb2, 0x3402, 0x0000, 0x0000, 0x0000, - 0x196e, 0xdc61, 0x33e3, 0x0000, 0x0000, 0x0000, 0x0c00, 0xde05, 0x33c4, 0x0000, - 0x0000, 0x0000, 0xef9a, 0xbd38, 0x33a5, 0x0000, 0x0000, 0x0000, 0xc1a0, 0xdf00, - 0x3385, 0x0000, 0x0000, 0x0000, 0x1090, 0x9973, 0x3365, 0x0000, 0x0000, 0x0000, - 0x4882, 0x8301, 0x3348, 0x0000, 0x0000, 0x0000, 0x7abe, 0xadc7, 0x3329, 0x0000, - 0x0000, 0x0000, 0x7cba, 0xec2b, 0x330a, 0x0000, 0x0000, 0x0000, 0xa520, 0x8f21, - 0x32e9, 0x0000, 0x0000, 0x0000, 0x710c, 0x8d36, 0x32cc, 0x0000, 0x0000, 0x0000, - 0x5212, 0xc6ed, 0x32ad, 0x0000, 0x0000, 0x0000, 0x7308, 0xfd76, 0x328d, 0x0000, - 0x0000, 0x0000, 0x5014, 0xd548, 0x326f, 0x0000, 0x0000, 0x0000, 0xd3f2, 0xb499, - 0x3250, 0x0000, 0x0000, 0x0000, 0x7f74, 0xa606, 0x3230, 0x0000, 0x0000, 0x0000, - 0xf0a8, 0xd720, 0x3212, 0x0000, 0x0000, 0x0000, 0x185c, 0xe20f, 0x31f2, 0x0000, - 0x0000, 0x0000, 0xa5a8, 0x8738, 0x31d4, 0x0000, 0x0000, 0x0000, 0xdd74, 0xcafb, - 0x31b4, 0x0000, 0x0000, 0x0000, 0x98b6, 0xbd8e, 0x3196, 0x0000, 0x0000, 0x0000, - 0xe9de, 0x977f, 0x3177, 0x0000, 0x0000, 0x0000, 0x67c0, 0x818d, 0x3158, 0x0000, - 0x0000, 0x0000, 0xe52a, 0x9322, 0x3139, 0x0000, 0x0000, 0x0000, 0xe568, 0x9b6c, - 0x3119, 0x0000, 0x0000, 0x0000, 0x2358, 0xaa0a, 0x30fa, 0x0000, 0x0000, 0x0000, - 0xe480, 0xe13b, 0x30d9, 0x0000, 0x0000, 0x0000, 0x3024, 0x90a1, 0x30bd, 0x0000, - 0x0000, 0x0000, 0x9620, 0xda30, 0x309d, 0x0000, 0x0000, 0x0000, 0x898a, 0xb388, - 0x307f, 0x0000, 0x0000, 0x0000, 0xb24c, 0xc891, 0x3060, 0x0000, 0x0000, 0x0000, - 0x8056, 0xf98b, 0x3041, 0x0000, 0x0000, 0x0000, 0x72a4, 0xa1ea, 0x3021, 0x0000, - 0x0000, 0x0000, 0x6af8, 0x9488, 0x3001, 0x0000, 0x0000, 0x0000, 0xe00c, 0xdfcb, - 0x2fe4, 0x0000, 0x0000, 0x0000, 0xeeec, 0xc941, 0x2fc4, 0x0000, 0x0000, 0x0000, - 0x53e0, 0xe70f, 0x2fa4, 0x0000, 0x0000, 0x0000, 0x8f60, 0x9c07, 0x2f85, 0x0000, - 0x0000, 0x0000, 0xb328, 0xc3e7, 0x2f68, 0x0000, 0x0000, 0x0000, 0x9404, 0xf8c7, - 0x2f48, 0x0000, 0x0000, 0x0000, 0x38e0, 0xc99f, 0x2f29, 0x0000, 0x0000, 0x0000, - 0x9778, 0xd984, 0x2f09, 0x0000, 0x0000, 0x0000, 0xe700, 0xd142, 0x2eea, 0x0000, - 0x0000, 0x0000, 0xd904, 0x9443, 0x2ecd, 0x0000, 0x0000, 0x0000, 0xd4ba, 0xae7e, - 0x2eae, 0x0000, 0x0000, 0x0000, 0x8e5e, 0x8524, 0x2e8f, 0x0000, 0x0000, 0x0000, - 0xb550, 0xc9ed, 0x2e6e, 0x0000, 0x0000, 0x0000, 0x53b8, 0x8648, 0x2e51, 0x0000, - 0x0000, 0x0000, 0xdae4, 0x87f9, 0x2e32, 0x0000, 0x0000, 0x0000, 0x2942, 0xd966, - 0x2e13, 0x0000, 0x0000, 0x0000, 0x4f28, 0xcf3c, 0x2df3, 0x0000, 0x0000, 0x0000, - 0xfa40, 0xc4ef, 0x2dd1, 0x0000, 0x0000, 0x0000, 0x4424, 0xbca7, 0x2db5, 0x0000, - 0x0000, 0x0000, 0x2e62, 0xcdc5, 0x2d97, 0x0000, 0x0000, 0x0000, 0xed88, 0x996b, - 0x2d78, 0x0000, 0x0000, 0x0000, 0x7c30, 0xd97d, 0x2d56, 0x0000, 0x0000, 0x0000, - 0xed26, 0xbf6e, 0x2d3a, 0x0000, 0x0000, 0x0000, 0x2918, 0x921b, 0x2d1a, 0x0000, - 0x0000, 0x0000, 0x4e24, 0xe84e, 0x2cfb, 0x0000, 0x0000, 0x0000, 0x6dc0, 0x92ec, - 0x2cdd, 0x0000, 0x0000, 0x0000, 0x4f2c, 0xacf8, 0x2cbd, 0x0000, 0x0000, 0x0000, - 0xc634, 0xf094, 0x2c9e, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe5d3, 0x2c7e, 0x0000, - 0x0000, 0x0000, 0x2180, 0xa600, 0x2c5b, 0x0000, 0x0000, 0x0000, 0x8480, 0xd680, - 0x2c3c, 0x0000, 0x0000, 0x0000, 0x8b24, 0xd63b, 0x2c22, 0x0000, 0x0000, 0x0000, - 0x02e0, 0xaa47, 0x2c00, 0x0000, 0x0000, 0x0000, 0x9ad0, 0xee84, 0x2be3, 0x0000, - 0x0000, 0x0000, 0xf7dc, 0xf699, 0x2bc6, 0x0000, 0x0000, 0x0000, 0xddde, 0xe490, - 0x2ba7, 0x0000, 0x0000, 0x0000, 0x34a0, 0xb4fd, 0x2b85, 0x0000, 0x0000, 0x0000, - 0x91b4, 0x8ef6, 0x2b68, 0x0000, 0x0000, 0x0000, 0xa3e0, 0xa2a7, 0x2b47, 0x0000, - 0x0000, 0x0000, 0xcce4, 0x82b3, 0x2b2a, 0x0000, 0x0000, 0x0000, 0xe4be, 0x8207, - 0x2b0c, 0x0000, 0x0000, 0x0000, 0x1d92, 0xab43, 0x2aed, 0x0000, 0x0000, 0x0000, - 0xe818, 0xf9f6, 0x2acd, 0x0000, 0x0000, 0x0000, 0xff12, 0xba80, 0x2aaf, 0x0000, - 0x0000, 0x0000, 0x5254, 0x8529, 0x2a90, 0x0000, 0x0000, 0x0000, 0x1b88, 0xe032, - 0x2a71, 0x0000, 0x0000, 0x0000, 0x3248, 0xd86d, 0x2a50, 0x0000, 0x0000, 0x0000, - 0x3140, 0xc9d5, 0x2a2e, 0x0000, 0x0000, 0x0000, 0x14e6, 0xbd47, 0x2a14, 0x0000, - 0x0000, 0x0000, 0x5c10, 0xe544, 0x29f4, 0x0000, 0x0000, 0x0000, 0x9f50, 0x90b6, - 0x29d4, 0x0000, 0x0000, 0x0000, 0x9850, 0xab55, 0x29b6, 0x0000, 0x0000, 0x0000, - 0x2750, 0x9d07, 0x2998, 0x0000, 0x0000, 0x0000, 0x6700, 0x8bbb, 0x2973, 0x0000, - 0x0000, 0x0000, 0x5dba, 0xed31, 0x295a, 0x0000, 0x0000, 0x0000, 0x61dc, 0x85fe, - 0x293a, 0x0000, 0x0000, 0x0000, 0x9ba2, 0xd6b4, 0x291c, 0x0000, 0x0000, 0x0000, - 0x2d30, 0xe3a5, 0x28fb, 0x0000, 0x0000, 0x0000, 0x6630, 0xb566, 0x28dd, 0x0000, - 0x0000, 0x0000, 0x5ad4, 0xa829, 0x28bf, 0x0000, 0x0000, 0x0000, 0x89d8, 0xe290, - 0x28a0, 0x0000, 0x0000, 0x0000, 0x3916, 0xc428, 0x2881, 0x0000, 0x0000, 0x0000, - 0x0490, 0xbea4, 0x2860, 0x0000, 0x0000, 0x0000, 0xee06, 0x80ee, 0x2843, 0x0000, - 0x0000, 0x0000, 0xfc00, 0xf327, 0x2820, 0x0000, 0x0000, 0x0000, 0xea40, 0xa871, - 0x2800, 0x0000, 0x0000, 0x0000, 0x63d8, 0x9c26, 0x27e4, 0x0000, 0x0000, 0x0000, - 0x07ba, 0xc0c9, 0x27c7, 0x0000, 0x0000, 0x0000, 0x3fa2, 0x9797, 0x27a8, 0x0000, - 0x0000, 0x0000, 0x21c6, 0xfeca, 0x2789, 0x0000, 0x0000, 0x0000, 0xde40, 0x860d, - 0x2768, 0x0000, 0x0000, 0x0000, 0x9cc8, 0x98ce, 0x2749, 0x0000, 0x0000, 0x0000, - 0x3778, 0xa31c, 0x272a, 0x0000, 0x0000, 0x0000, 0xe778, 0xf6e2, 0x270b, 0x0000, - 0x0000, 0x0000, 0x59b8, 0xf841, 0x26ed, 0x0000, 0x0000, 0x0000, 0x02e0, 0xad04, - 0x26cd, 0x0000, 0x0000, 0x0000, 0x5a92, 0x9380, 0x26b0, 0x0000, 0x0000, 0x0000, - 0xc740, 0x8886, 0x268d, 0x0000, 0x0000, 0x0000, 0x0680, 0xfaf8, 0x266c, 0x0000, - 0x0000, 0x0000, 0xfb60, 0x897f, 0x2653, 0x0000, 0x0000, 0x0000, 0x8760, 0xf903, - 0x2634, 0x0000, 0x0000, 0x0000, 0xad2a, 0xc2c8, 0x2615, 0x0000, 0x0000, 0x0000, - 0x2d86, 0x8aef, 0x25f6, 0x0000, 0x0000, 0x0000, 0x1ef4, 0xe627, 0x25d6, 0x0000, - 0x0000, 0x0000, 0x09e4, 0x8020, 0x25b7, 0x0000, 0x0000, 0x0000, 0x7548, 0xd227, - 0x2598, 0x0000, 0x0000, 0x0000, 0x75dc, 0xfb5b, 0x2579, 0x0000, 0x0000, 0x0000, - 0xea84, 0xc8b6, 0x255a, 0x0000, 0x0000, 0x0000, 0xe4d0, 0x8145, 0x253b, 0x0000, - 0x0000, 0x0000, 0x3640, 0x9768, 0x251c, 0x0000, 0x0000, 0x0000, 0x246a, 0xccec, - 0x24fe, 0x0000, 0x0000, 0x0000, 0x51d0, 0xa075, 0x24dd, 0x0000, 0x0000, 0x0000, - 0x4638, 0xa385, 0x24bf, 0x0000, 0x0000, 0x0000, 0xd788, 0xd776, 0x24a1, 0x0000, - 0x0000, 0x0000, 0x1370, 0x8997, 0x2482, 0x0000, 0x0000, 0x0000, 0x1e88, 0x9b67, - 0x2462, 0x0000, 0x0000, 0x0000, 0x6c08, 0xd975, 0x2444, 0x0000, 0x0000, 0x0000, - 0xfdb0, 0xcfc0, 0x2422, 0x0000, 0x0000, 0x0000, 0x3100, 0xc026, 0x2406, 0x0000, - 0x0000, 0x0000, 0xc5b4, 0xae64, 0x23e6, 0x0000, 0x0000, 0x0000, 0x2280, 0xf687, - 0x23c3, 0x0000, 0x0000, 0x0000, 0x2de0, 0x9006, 0x23a9, 0x0000, 0x0000, 0x0000, - 0x24bc, 0xf631, 0x238a, 0x0000, 0x0000, 0x0000, 0xb8d4, 0xa975, 0x236b, 0x0000, - 0x0000, 0x0000, 0xd9a4, 0xb949, 0x234b, 0x0000, 0x0000, 0x0000, 0xb54e, 0xbd39, - 0x232d, 0x0000, 0x0000, 0x0000, 0x4aac, 0x9a52, 0x230e, 0x0000, 0x0000, 0x0000, - 0xbbbc, 0xd085, 0x22ef, 0x0000, 0x0000, 0x0000, 0xdf18, 0xc633, 0x22cf, 0x0000, - 0x0000, 0x0000, 0x16d0, 0xeca5, 0x22af, 0x0000, 0x0000, 0x0000, 0xf2a0, 0xdf6f, - 0x228e, 0x0000, 0x0000, 0x0000, 0x8c44, 0xe86b, 0x2272, 0x0000, 0x0000, 0x0000, - 0x35c0, 0xbbf4, 0x2253, 0x0000, 0x0000, 0x0000, 0x0c40, 0xdafb, 0x2230, 0x0000, - 0x0000, 0x0000, 0x92dc, 0x9935, 0x2216, 0x0000, 0x0000, 0x0000, 0x0ca0, 0xbda6, - 0x21f3, 0x0000, 0x0000, 0x0000, 0x5958, 0xa6fd, 0x21d6, 0x0000, 0x0000, 0x0000, - 0xa3dc, 0x9d7f, 0x21b9, 0x0000, 0x0000, 0x0000, 0x79dc, 0xfcb5, 0x2199, 0x0000, - 0x0000, 0x0000, 0xf264, 0xcebb, 0x217b, 0x0000, 0x0000, 0x0000, 0x0abe, 0x8308, - 0x215c, 0x0000, 0x0000, 0x0000, 0x30ae, 0xb463, 0x213d, 0x0000, 0x0000, 0x0000, - 0x6228, 0xb040, 0x211c, 0x0000, 0x0000, 0x0000, 0xc9b2, 0xf43b, 0x20ff, 0x0000, - 0x0000, 0x0000, 0x3d8e, 0xa4b3, 0x20e0, 0x0000, 0x0000, 0x0000, 0x84e6, 0x8dab, - 0x20c1, 0x0000, 0x0000, 0x0000, 0xa124, 0x9b74, 0x20a1, 0x0000, 0x0000, 0x0000, - 0xc276, 0xd497, 0x2083, 0x0000, 0x0000, 0x0000, 0x6354, 0xa466, 0x2063, 0x0000, - 0x0000, 0x0000, 0x8654, 0xaf0a, 0x2044, 0x0000, 0x0000, 0x0000, 0x1d20, 0xfa5c, - 0x2024, 0x0000, 0x0000, 0x0000, 0xbcd0, 0xf3f0, 0x2004, 0x0000, 0x0000, 0x0000, - 0xedf0, 0xf0b6, 0x1fe7, 0x0000, 0x0000, 0x0000, 0x45bc, 0x9182, 0x1fc9, 0x0000, - 0x0000, 0x0000, 0xe254, 0xdc85, 0x1faa, 0x0000, 0x0000, 0x0000, 0xb898, 0xe9b1, - 0x1f8a, 0x0000, 0x0000, 0x0000, 0x0ebe, 0xe6f0, 0x1f6c, 0x0000, 0x0000, 0x0000, - 0xa9b8, 0xf584, 0x1f4c, 0x0000, 0x0000, 0x0000, 0x12e8, 0xdf6b, 0x1f2e, 0x0000, - 0x0000, 0x0000, 0x9f9e, 0xcd55, 0x1f0f, 0x0000, 0x0000, 0x0000, 0x05a0, 0xec3a, - 0x1eef, 0x0000, 0x0000, 0x0000, 0xd8e0, 0x96f8, 0x1ed1, 0x0000, 0x0000, 0x0000, - 0x3bd4, 0xccc6, 0x1eb1, 0x0000, 0x0000, 0x0000, 0x4910, 0xb87b, 0x1e93, 0x0000, - 0x0000, 0x0000, 0xbefc, 0xd40b, 0x1e73, 0x0000, 0x0000, 0x0000, 0x317e, 0xa406, - 0x1e55, 0x0000, 0x0000, 0x0000, 0x6bb2, 0xc2b2, 0x1e36, 0x0000, 0x0000, 0x0000, - 0xb87e, 0xbb78, 0x1e17, 0x0000, 0x0000, 0x0000, 0xa03c, 0xdbbd, 0x1df7, 0x0000, - 0x0000, 0x0000, 0x5b6c, 0xe3c8, 0x1dd9, 0x0000, 0x0000, 0x0000, 0x8968, 0xca8e, - 0x1dba, 0x0000, 0x0000, 0x0000, 0xc024, 0xe6ab, 0x1d9a, 0x0000, 0x0000, 0x0000, - 0x4110, 0xd4eb, 0x1d7a, 0x0000, 0x0000, 0x0000, 0xa168, 0xbdb5, 0x1d5d, 0x0000, - 0x0000, 0x0000, 0x012e, 0xa5fa, 0x1d3e, 0x0000, 0x0000, 0x0000, 0x6838, 0x9c1f, - 0x1d1e, 0x0000, 0x0000, 0x0000, 0xa158, 0xaa76, 0x1d00, 0x0000, 0x0000, 0x0000, - 0x090a, 0xbd95, 0x1ce1, 0x0000, 0x0000, 0x0000, 0xf73e, 0x8b6d, 0x1cc2, 0x0000, - 0x0000, 0x0000, 0x5fda, 0xbcbf, 0x1ca3, 0x0000, 0x0000, 0x0000, 0xdbe8, 0xb89f, - 0x1c84, 0x0000, 0x0000, 0x0000, 0x6e4c, 0x96c7, 0x1c64, 0x0000, 0x0000, 0x0000, - 0x19c2, 0xf2a4, 0x1c46, 0x0000, 0x0000, 0x0000, 0xb800, 0xf855, 0x1c1e, 0x0000, - 0x0000, 0x0000, 0x87fc, 0x85ff, 0x1c08, 0x0000, 0x0000, 0x0000, 0x1418, 0x839f, - 0x1be9, 0x0000, 0x0000, 0x0000, 0x6186, 0xd9d8, 0x1bca, 0x0000, 0x0000, 0x0000, - 0xf500, 0xabaa, 0x1ba6, 0x0000, 0x0000, 0x0000, 0x7b36, 0xdafe, 0x1b8c, 0x0000, - 0x0000, 0x0000, 0xf394, 0xe6d8, 0x1b6c, 0x0000, 0x0000, 0x0000, 0x6efc, 0x9e55, - 0x1b4e, 0x0000, 0x0000, 0x0000, 0x5e10, 0xc523, 0x1b2e, 0x0000, 0x0000, 0x0000, - 0x8210, 0xb6f9, 0x1b0d, 0x0000, 0x0000, 0x0000, 0x9ab0, 0x96e3, 0x1af1, 0x0000, - 0x0000, 0x0000, 0x3864, 0x92e7, 0x1ad1, 0x0000, 0x0000, 0x0000, 0x9878, 0xdc65, - 0x1ab1, 0x0000, 0x0000, 0x0000, 0xfa20, 0xd6cb, 0x1a94, 0x0000, 0x0000, 0x0000, - 0x6c00, 0xa4e4, 0x1a70, 0x0000, 0x0000, 0x0000, 0xab40, 0xb41b, 0x1a53, 0x0000, - 0x0000, 0x0000, 0x43a4, 0x8ede, 0x1a37, 0x0000, 0x0000, 0x0000, 0x22e0, 0x9314, - 0x1a15, 0x0000, 0x0000, 0x0000, 0x6170, 0xb949, 0x19f8, 0x0000, 0x0000, 0x0000, - 0x6b00, 0xe056, 0x19d8, 0x0000, 0x0000, 0x0000, 0x9ba8, 0xa94c, 0x19b9, 0x0000, - 0x0000, 0x0000, 0xfaa0, 0xaa16, 0x199b, 0x0000, 0x0000, 0x0000, 0x899a, 0xf627, - 0x197d, 0x0000, 0x0000, 0x0000, 0x9f20, 0xfb70, 0x195d, 0x0000, 0x0000, 0x0000, - 0xa4b8, 0xc176, 0x193e, 0x0000, 0x0000, 0x0000, 0xb21c, 0x85c3, 0x1920, 0x0000, - 0x0000, 0x0000, 0x50d2, 0x9b19, 0x1901, 0x0000, 0x0000, 0x0000, 0xd4b0, 0xb708, - 0x18e0, 0x0000, 0x0000, 0x0000, 0xfb88, 0xf510, 0x18c1, 0x0000, 0x0000, 0x0000, - 0x31ec, 0xdc8d, 0x18a3, 0x0000, 0x0000, 0x0000, 0x3c00, 0xbff9, 0x1885, 0x0000, - 0x0000, 0x0000, 0x5020, 0xc30b, 0x1862, 0x0000, 0x0000, 0x0000, 0xd4f0, 0xda0c, - 0x1844, 0x0000, 0x0000, 0x0000, 0x20d2, 0x99a5, 0x1828, 0x0000, 0x0000, 0x0000, - 0x852e, 0xd159, 0x1809, 0x0000, 0x0000, 0x0000, 0x7cd8, 0x97a1, 0x17e9, 0x0000, - 0x0000, 0x0000, 0x423a, 0x997b, 0x17cb, 0x0000, 0x0000, 0x0000, 0xc1c0, 0xbe7d, - 0x17a8, 0x0000, 0x0000, 0x0000, 0xe8bc, 0xdcdd, 0x178d, 0x0000, 0x0000, 0x0000, - 0x8b28, 0xae06, 0x176e, 0x0000, 0x0000, 0x0000, 0x102e, 0xb8d4, 0x174f, 0x0000, - 0x0000, 0x0000, 0xaa00, 0xaa5c, 0x172f, 0x0000, 0x0000, 0x0000, 0x51f0, 0x9fc0, - 0x170e, 0x0000, 0x0000, 0x0000, 0xf858, 0xe181, 0x16f2, 0x0000, 0x0000, 0x0000, - 0x91a8, 0x8162, 0x16d3, 0x0000, 0x0000, 0x0000, 0x5f40, 0xcb6f, 0x16b1, 0x0000, - 0x0000, 0x0000, 0xbb50, 0xe55f, 0x1693, 0x0000, 0x0000, 0x0000, 0xacd2, 0xd895, - 0x1676, 0x0000, 0x0000, 0x0000, 0xef30, 0x97bf, 0x1654, 0x0000, 0x0000, 0x0000, - 0xf700, 0xb3d7, 0x1633, 0x0000, 0x0000, 0x0000, 0x3454, 0xa7b5, 0x1619, 0x0000, - 0x0000, 0x0000, 0x6b00, 0xa929, 0x15f6, 0x0000, 0x0000, 0x0000, 0x9f04, 0x89f7, - 0x15db, 0x0000, 0x0000, 0x0000, 0xad78, 0xd985, 0x15bc, 0x0000, 0x0000, 0x0000, - 0xa46a, 0xae3f, 0x159d, 0x0000, 0x0000, 0x0000, 0x63a0, 0xd0da, 0x157c, 0x0000, - 0x0000, 0x0000, 0x5e90, 0x817d, 0x155e, 0x0000, 0x0000, 0x0000, 0x1494, 0xb13f, - 0x1540, 0x0000, 0x0000, 0x0000, 0x0090, 0x9c40, 0x1521, 0x0000, 0x0000, 0x0000, - 0xdd70, 0xcc86, 0x1500, 0x0000, 0x0000, 0x0000, 0x64f8, 0xdb6f, 0x14e1, 0x0000, - 0x0000, 0x0000, 0xe22c, 0xac17, 0x14c3, 0x0000, 0x0000, 0x0000, 0x60e0, 0xa9ad, - 0x14a3, 0x0000, 0x0000, 0x0000, 0x4640, 0xd658, 0x1481, 0x0000, 0x0000, 0x0000, - 0x6490, 0xa181, 0x1467, 0x0000, 0x0000, 0x0000, 0x1df4, 0xaaa2, 0x1447, 0x0000, - 0x0000, 0x0000, 0xb94a, 0x8f61, 0x1429, 0x0000, 0x0000, 0x0000, 0x5198, 0x9d83, - 0x1409, 0x0000, 0x0000, 0x0000, 0x0f7a, 0xa818, 0x13eb, 0x0000, 0x0000, 0x0000, - 0xc45e, 0xc06c, 0x13cc, 0x0000, 0x0000, 0x0000, 0x4ec0, 0xfa29, 0x13a8, 0x0000, - 0x0000, 0x0000, 0x6418, 0x8cad, 0x138c, 0x0000, 0x0000, 0x0000, 0xbcc8, 0xe7d1, - 0x136f, 0x0000, 0x0000, 0x0000, 0xc934, 0xf9b0, 0x134f, 0x0000, 0x0000, 0x0000, - 0x6ce0, 0x98df, 0x1331, 0x0000, 0x0000, 0x0000, 0x3516, 0xe5e9, 0x1312, 0x0000, - 0x0000, 0x0000, 0xc6c0, 0xef8b, 0x12ef, 0x0000, 0x0000, 0x0000, 0xaf02, 0x913d, - 0x12d4, 0x0000, 0x0000, 0x0000, 0xd230, 0xe1d5, 0x12b5, 0x0000, 0x0000, 0x0000, - 0xfba8, 0xc232, 0x1295, 0x0000, 0x0000, 0x0000, 0x7ba4, 0xabeb, 0x1277, 0x0000, - 0x0000, 0x0000, 0x6e5c, 0xc692, 0x1258, 0x0000, 0x0000, 0x0000, 0x76a2, 0x9756, - 0x1239, 0x0000, 0x0000, 0x0000, 0xe180, 0xe423, 0x1214, 0x0000, 0x0000, 0x0000, - 0x8c3c, 0x90f8, 0x11fb, 0x0000, 0x0000, 0x0000, 0x9f3c, 0x9fd2, 0x11dc, 0x0000, - 0x0000, 0x0000, 0x53e0, 0xb73e, 0x11bd, 0x0000, 0x0000, 0x0000, 0x45be, 0x88d6, - 0x119e, 0x0000, 0x0000, 0x0000, 0x111a, 0x8bc0, 0x117f, 0x0000, 0x0000, 0x0000, - 0xe26a, 0xd7ff, 0x1160, 0x0000, 0x0000, 0x0000, 0xfb60, 0xdd8d, 0x113f, 0x0000, - 0x0000, 0x0000, 0x9370, 0xc108, 0x1120, 0x0000, 0x0000, 0x0000, 0x9654, 0x8baf, - 0x1103, 0x0000, 0x0000, 0x0000, 0xd6ec, 0xd6b9, 0x10e4, 0x0000, 0x0000, 0x0000, - 0x23e4, 0xd7b7, 0x10c4, 0x0000, 0x0000, 0x0000, 0x1aa6, 0xa847, 0x10a6, 0x0000, - 0x0000, 0x0000, 0xbee6, 0x9fef, 0x1087, 0x0000, 0x0000, 0x0000, 0x26d0, 0xa6eb, - 0x1066, 0x0000, 0x0000, 0x0000, 0x5b86, 0xa880, 0x1049, 0x0000, 0x0000, 0x0000, - 0x125c, 0xd971, 0x1029, 0x0000, 0x0000, 0x0000, 0x1f78, 0x9d18, 0x100a, 0x0000, - 0x0000, 0x0000, 0x0e84, 0xb15b, 0x0feb, 0x0000, 0x0000, 0x0000, 0xd0c0, 0xc150, - 0x0fcc, 0x0000, 0x0000, 0x0000, 0xa330, 0xc40c, 0x0fad, 0x0000, 0x0000, 0x0000, - 0x5202, 0xfc2c, 0x0f8f, 0x0000, 0x0000, 0x0000, 0x3f7c, 0xecf5, 0x0f6f, 0x0000, - 0x0000, 0x0000, 0xef44, 0xfdfd, 0x0f50, 0x0000, 0x0000, 0x0000, 0x3f6c, 0xab1b, - 0x0f31, 0x0000, 0x0000, 0x0000, 0xf658, 0x89ec, 0x0f11, 0x0000, 0x0000, 0x0000, - 0xbfc8, 0x9ba8, 0x0ef4, 0x0000, 0x0000, 0x0000, 0x3d40, 0xbe21, 0x0ed5, 0x0000, - 0x0000, 0x0000, 0xbbc4, 0xc70d, 0x0eb6, 0x0000, 0x0000, 0x0000, 0x5158, 0xdb16, - 0x0e96, 0x0000, 0x0000, 0x0000, 0xb5a8, 0xa8d8, 0x0e78, 0x0000, 0x0000, 0x0000, - 0xcccc, 0xb40e, 0x0e58, 0x0000, 0x0000, 0x0000, 0x448c, 0xcb62, 0x0e3a, 0x0000, - 0x0000, 0x0000, 0xf12a, 0x8aed, 0x0e1b, 0x0000, 0x0000, 0x0000, 0x79d0, 0xc59c, - 0x0dfb, 0x0000, 0x0000, 0x0000, 0x06b4, 0xcdc9, 0x0ddd, 0x0000, 0x0000, 0x0000, - 0xae70, 0xa979, 0x0dbe, 0x0000, 0x0000, 0x0000, 0x317c, 0xa8fb, 0x0d9e, 0x0000, - 0x0000, 0x0000, 0x5fe0, 0x8a50, 0x0d7d, 0x0000, 0x0000, 0x0000, 0x70b6, 0xfdfa, - 0x0d61, 0x0000, 0x0000, 0x0000, 0x1640, 0x9dc7, 0x0d41, 0x0000, 0x0000, 0x0000, - 0x9a9c, 0xdc50, 0x0d23, 0x0000, 0x0000, 0x0000, 0x4fcc, 0x9a9b, 0x0d04, 0x0000, - 0x0000, 0x0000, 0x7e48, 0x8f77, 0x0ce5, 0x0000, 0x0000, 0x0000, 0x84e4, 0xd4b9, - 0x0cc6, 0x0000, 0x0000, 0x0000, 0x84e0, 0xbd10, 0x0ca6, 0x0000, 0x0000, 0x0000, - 0x1b0a, 0xc8d9, 0x0c88, 0x0000, 0x0000, 0x0000, 0x6a48, 0xfc81, 0x0c68, 0x0000, - 0x0000, 0x0000, 0x070a, 0xbef6, 0x0c4a, 0x0000, 0x0000, 0x0000, 0x8a70, 0xf096, - 0x0c2b, 0x0000, 0x0000, 0x0000, 0xecc2, 0xc994, 0x0c0c, 0x0000, 0x0000, 0x0000, - 0x1540, 0x9537, 0x0bea, 0x0000, 0x0000, 0x0000, 0x1b02, 0xab5b, 0x0bce, 0x0000, - 0x0000, 0x0000, 0x5dc0, 0xb0c8, 0x0bad, 0x0000, 0x0000, 0x0000, 0xc928, 0xe034, - 0x0b8f, 0x0000, 0x0000, 0x0000, 0x2d12, 0xb4b0, 0x0b71, 0x0000, 0x0000, 0x0000, - 0x8fc2, 0xbb94, 0x0b52, 0x0000, 0x0000, 0x0000, 0xe236, 0xe22f, 0x0b33, 0x0000, - 0x0000, 0x0000, 0xb97c, 0xbe9e, 0x0b13, 0x0000, 0x0000, 0x0000, 0xe1a6, 0xe16d, - 0x0af5, 0x0000, 0x0000, 0x0000, 0xd330, 0xbaf0, 0x0ad6, 0x0000, 0x0000, 0x0000, - 0xc0bc, 0xbbd0, 0x0ab7, 0x0000, 0x0000, 0x0000, 0x8e66, 0xdd9b, 0x0a98, 0x0000, - 0x0000, 0x0000, 0xc95c, 0xf799, 0x0a79, 0x0000, 0x0000, 0x0000, 0xdac0, 0xbe4c, - 0x0a55, 0x0000, 0x0000, 0x0000, 0xafc0, 0xc378, 0x0a37, 0x0000, 0x0000, 0x0000, - 0xa880, 0xe341, 0x0a19, 0x0000, 0x0000, 0x0000, 0xc242, 0x81f6, 0x09fd, 0x0000, - 0x0000, 0x0000, 0x7470, 0xc777, 0x09de, 0x0000, 0x0000, 0x0000, 0x62bc, 0xb684, - 0x09be, 0x0000, 0x0000, 0x0000, 0x43ac, 0x8c58, 0x099f, 0x0000, 0x0000, 0x0000, - 0xcc3c, 0xf9ac, 0x0981, 0x0000, 0x0000, 0x0000, 0x1526, 0xb670, 0x0962, 0x0000, - 0x0000, 0x0000, 0xc9fe, 0xdf50, 0x0943, 0x0000, 0x0000, 0x0000, 0x6ae6, 0xc065, - 0x0924, 0x0000, 0x0000, 0x0000, 0xb114, 0xcf29, 0x0905, 0x0000, 0x0000, 0x0000, - 0xd388, 0x922a, 0x08e4, 0x0000, 0x0000, 0x0000, 0xcf54, 0xb926, 0x08c7, 0x0000, - 0x0000, 0x0000, 0x3826, 0xe855, 0x08a8, 0x0000, 0x0000, 0x0000, 0xe7c8, 0x829b, - 0x0888, 0x0000, 0x0000, 0x0000, 0x546c, 0xa903, 0x086a, 0x0000, 0x0000, 0x0000, - 0x8768, 0x99cc, 0x0849, 0x0000, 0x0000, 0x0000, 0x00ac, 0xf529, 0x082b, 0x0000, - 0x0000, 0x0000, 0x2658, 0x9f0b, 0x080c, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x9e21, - 0x07ee, 0x0000, 0x0000, 0x0000, 0x6da2, 0x9910, 0x07cf, 0x0000, 0x0000, 0x0000, - 0x9220, 0xf9b3, 0x07b0, 0x0000, 0x0000, 0x0000, 0x3d90, 0xa541, 0x0791, 0x0000, - 0x0000, 0x0000, 0x6e4c, 0xe7cc, 0x0771, 0x0000, 0x0000, 0x0000, 0xa8fa, 0xe80a, - 0x0753, 0x0000, 0x0000, 0x0000, 0x4e14, 0xc3a7, 0x0734, 0x0000, 0x0000, 0x0000, - 0xf7e0, 0xbad9, 0x0712, 0x0000, 0x0000, 0x0000, 0xfea0, 0xeff2, 0x06f5, 0x0000, - 0x0000, 0x0000, 0xcef6, 0xbd48, 0x06d7, 0x0000, 0x0000, 0x0000, 0x7544, 0xf559, - 0x06b7, 0x0000, 0x0000, 0x0000, 0x2388, 0xf655, 0x0698, 0x0000, 0x0000, 0x0000, - 0xe900, 0xad56, 0x0676, 0x0000, 0x0000, 0x0000, 0x2cc0, 0x8437, 0x0659, 0x0000, - 0x0000, 0x0000, 0x3068, 0xc544, 0x063b, 0x0000, 0x0000, 0x0000, 0xdc70, 0xe73c, - 0x061b, 0x0000, 0x0000, 0x0000, 0xee50, 0x9d49, 0x05fc, 0x0000, 0x0000, 0x0000, - 0x93d2, 0x81f6, 0x05df, 0x0000, 0x0000, 0x0000, 0x941c, 0xadff, 0x05bf, 0x0000, - 0x0000, 0x0000, 0x2ce2, 0x8e45, 0x05a1, 0x0000, 0x0000, 0x0000, 0x4a60, 0x95fd, - 0x0581, 0x0000, 0x0000, 0x0000, 0x79f8, 0xb83a, 0x0563, 0x0000, 0x0000, 0x0000, - 0xcb58, 0xa1f5, 0x0543, 0x0000, 0x0000, 0x0000, 0x2a3a, 0xdc36, 0x0525, 0x0000, - 0x0000, 0x0000, 0x14ee, 0x890e, 0x0506, 0x0000, 0x0000, 0x0000, 0x8f20, 0xc432, - 0x04e3, 0x0000, 0x0000, 0x0000, 0x8440, 0xb21d, 0x04c6, 0x0000, 0x0000, 0x0000, - 0x5430, 0xf698, 0x04a7, 0x0000, 0x0000, 0x0000, 0x04ae, 0x8b20, 0x048a, 0x0000, - 0x0000, 0x0000, 0x04d0, 0xe872, 0x046b, 0x0000, 0x0000, 0x0000, 0xc78e, 0x8893, - 0x044c, 0x0000, 0x0000, 0x0000, 0x0f78, 0x9895, 0x042b, 0x0000, 0x0000, 0x0000, - 0x11d4, 0xdf2e, 0x040d, 0x0000, 0x0000, 0x0000, 0xe84c, 0x89d5, 0x03ef, 0x0000, - 0x0000, 0x0000, 0xf7be, 0x8a67, 0x03d0, 0x0000, 0x0000, 0x0000, 0x95d0, 0xc906, - 0x03b1, 0x0000, 0x0000, 0x0000, 0x64ce, 0xd96c, 0x0392, 0x0000, 0x0000, 0x0000, - 0x97ba, 0xa16f, 0x0373, 0x0000, 0x0000, 0x0000, 0x463c, 0xc51a, 0x0354, 0x0000, - 0x0000, 0x0000, 0xef0a, 0xe93e, 0x0335, 0x0000, 0x0000, 0x0000, 0x526a, 0xa466, - 0x0316, 0x0000, 0x0000, 0x0000, 0x4140, 0xa94d, 0x02f5, 0x0000, 0x0000, 0x0000, - 0xb4ec, 0xce68, 0x02d8, 0x0000, 0x0000, 0x0000, 0x4fa2, 0x8490, 0x02b9, 0x0000, - 0x0000, 0x0000, 0x4e60, 0xca98, 0x0298, 0x0000, 0x0000, 0x0000, 0x08dc, 0xe09c, - 0x027a, 0x0000, 0x0000, 0x0000, 0x2b90, 0xc7e3, 0x025c, 0x0000, 0x0000, 0x0000, - 0x5a7c, 0xf8ef, 0x023c, 0x0000, 0x0000, 0x0000, 0x5022, 0x9d58, 0x021e, 0x0000, - 0x0000, 0x0000, 0x553a, 0xe242, 0x01ff, 0x0000, 0x0000, 0x0000, 0x7e6e, 0xb54d, - 0x01e0, 0x0000, 0x0000, 0x0000, 0xd2d4, 0xa88c, 0x01c1, 0x0000, 0x0000, 0x0000, - 0x75b6, 0xfe6d, 0x01a2, 0x0000, 0x0000, 0x0000, 0x3bb2, 0xf04c, 0x0183, 0x0000, - 0x0000, 0x0000, 0xc2d0, 0xc046, 0x0163, 0x0000, 0x0000, 0x0000, 0x250c, 0xf9d6, - 0x0145, 0x0000, 0x0000, 0x0000, 0xb7b4, 0x8a0d, 0x0126, 0x0000, 0x0000, 0x0000, - 0x1a72, 0xe4f5, 0x0107, 0x0000, 0x0000, 0x0000, 0x825c, 0xa9b8, 0x00e8, 0x0000, - 0x0000, 0x0000, 0x6c90, 0xc9ad, 0x00c6, 0x0000, 0x0000, 0x0000, 0x4d00, 0xd1bb, - 0x00aa, 0x0000, 0x0000, 0x0000, 0xa4a0, 0xee01, 0x0087, 0x0000, 0x0000, 0x0000, - 0x89a8, 0xbe9f, 0x006b, 0x0000, 0x0000, 0x0000, 0x038e, 0xc80c, 0x004d, 0x0000, - 0x0000, 0x0000, 0xfe26, 0x8384, 0x002e, 0x0000, 0x0000, 0x0000, 0xcd90, 0xca57, - 0x000e, 0x0000 -}; - -void MacroAssembler::libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address zero_none = (address)_zero_none; - address _4onpi_d = (address)__4onpi_d; - address TWO_32H = (address)_TWO_32H; - address pi04_3d = (address)_pi04_3d; - address pi04_5d = (address)_pi04_5d; - address SCALE = (address)_SCALE; - address zeros = (address)_zeros; - address pi04_2d = (address)_pi04_2d; - address TWO_12H = (address)_TWO_12H; - address _4onpi_31l = (address)__4onpi_31l; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -16); - push(esi); - push(edi); - push(ebx); - subl(esp, 20); - movzwl(ebx, Address(ebp, 16)); - andl(ebx, 32767); - movl(eax, Address(ebp, 20)); - cmpl(ebx, 16413); - movl(esi, Address(ebp, 24)); - movl(Address(esp, 4), eax); - jcc(Assembler::greaterEqual, B1_8); - - bind(B1_2); - fld_x(Address(ebp, 8)); - fld_d(ExternalAddress(_4onpi_d)); //0x6dc9c883UL, 0x3ff45f30UL - fmul(1); - fstp_x(Address(esp, 8)); - movzwl(ecx, Address(esp, 16)); - negl(ecx); - addl(ecx, 30); - movl(eax, Address(esp, 12)); - shrl(eax); - cmpl(Address(esp, 4), 0); - jcc(Assembler::notEqual, B1_4); - - bind(B1_3); - lea(ecx, Address(eax, 1)); - andl(ecx, -2); - jmp(B1_5); - - bind(B1_4); - movl(ecx, eax); - addl(eax, Address(esp, 4)); - movl(edx, eax); - andl(edx, 1); - addl(ecx, edx); - - bind(B1_5); - fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL - cmpl(ebx, 16400); - movl(Address(esp, 0), ecx); - fild_s(Address(esp, 0)); - jcc(Assembler::greaterEqual, B1_7); - - bind(B1_6); - fld_d(ExternalAddress(pi04_3d)); //0x54442d00UL, 0x3fe921fbUL - fmul(1); - fsubp(3); - fxch(1); - fmul(2); - fld_s(2); - fadd(1); - fsubrp(1); - fld_s(0); - fxch(1); - fsuba(3); - fld_d(ExternalAddress(8 + pi04_3d)); //0x98cc5180UL, 0x3ce84698UL - fmul(3); - fsuba(2); - fxch(1); - fsub(2); - fsubrp(1); - faddp(3); - fld_d(ExternalAddress(16 + pi04_3d)); //0xcbb5bf6cUL, 0xb9dfc8f8UL - fmulp(2); - fld_s(1); - fsubr(1); - fsuba(1); - fxch(2); - fsubp(1); - faddp(2); - fxch(1); - jmp(B1_15); - - bind(B1_7); - fld_d(ExternalAddress(pi04_5d)); //0x54400000UL, 0x3fe921fbUL - fmul(1); - fsubp(3); - fxch(1); - fmul(2); - fld_s(2); - fadd(1); - fsubrp(1); - fld_s(0); - fxch(1); - fsuba(3); - fld_d(ExternalAddress(8 + pi04_5d)); //0x1a600000UL, 0x3dc0b461UL - fmul(3); - fsuba(2); - fxch(1); - fsub(2); - fsubrp(1); - faddp(3); - fld_d(ExternalAddress(16 + pi04_5d)); //0x2e000000UL, 0x3b93198aUL - fmul(2); - fld_s(0); - fsubr(2); - fsuba(2); - fxch(1); - fsubp(2); - fxch(1); - faddp(3); - fld_d(ExternalAddress(24 + pi04_5d)); //0x25200000UL, 0x396b839aUL - fmul(2); - fld_s(0); - fsubr(2); - fsuba(2); - fxch(1); - fsubp(2); - fxch(1); - faddp(3); - fld_d(ExternalAddress(32 + pi04_5d)); //0x533e63a0UL, 0x37027044UL - fmulp(2); - fld_s(1); - fsubr(1); - fsuba(1); - fxch(2); - fsubp(1); - faddp(2); - fxch(1); - jmp(B1_15); - - bind(B1_8); - fld_x(Address(ebp, 8)); - addl(ebx, -16417); - fmul_d(as_Address(ExternalAddress(SCALE))); //0x00000000UL, 0x32600000UL - movl(eax, -2078209981); - imull(ebx); - addl(edx, ebx); - movl(ecx, ebx); - sarl(edx, 4); - sarl(ecx, 31); - subl(edx, ecx); - movl(eax, edx); - shll(eax, 5); - fstp_x(Address(ebp, 8)); - fld_x(Address(ebp, 8)); - subl(eax, edx); - movl(Address(ebp, 8), 0); - subl(ebx, eax); - fld_x(Address(ebp, 8)); - cmpl(ebx, 17); - fsuba(1); - jcc(Assembler::less, B1_10); - - bind(B1_9); - lea(eax, Address(noreg, edx, Address::times_8)); - lea(ecx, Address(eax, edx, Address::times_4)); - incl(edx); - fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmul(2); - fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmul(2); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fld_s(1); - fadd(1); - fstp_x(Address(esp, 8)); - andl(Address(esp, 8), -16777216); - fld_x(Address(esp, 8)); - fsubp(1); - jmp(B1_11); - - bind(B1_10); - fld_d(ExternalAddress(zeros)); //0x00000000UL, 0x00000000UL - fld_s(0); - - bind(B1_11); - fld_s(0); - lea(eax, Address(noreg, edx, Address::times_8)); - fld_s(3); - lea(edx, Address(eax, edx, Address::times_4)); - fld_x(Address(_4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmul(6); - movl(Address(esp, 0), edx); - fadda(2); - fxch(2); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fld_x(Address(12 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmula(2); - fld_s(2); - fadd(2); - fld_s(0); - fxch(1); - fsubra(3); - fxch(3); - fchs(); - faddp(4); - fxch(3); - faddp(4); - fxch(2); - fadd(3); - fxch(2); - fmul(5); - fadda(2); - fld_s(4); - fld_x(Address(24 + _4onpi_31l, RelocationHolder::none).plus_disp(edx, Address::times_1)); - fmula(1); - fxch(1); - fadda(4); - fxch(4); - fstp_x(Address(esp, 8)); - movzwl(ebx, Address(esp, 16)); - andl(ebx, 32767); - cmpl(ebx, 16415); - jcc(Assembler::greaterEqual, B1_13); - - bind(B1_12); - negl(ebx); - addl(ebx, 30); - movl(ecx, ebx); - movl(eax, Address(esp, 12)); - shrl(eax); - shll(eax); - movl(Address(esp, 12), eax); - movl(Address(esp, 8), 0); - shrl(eax); - jmp(B1_14); - - bind(B1_13); - negl(ebx); - addl(ebx, 30); - movl(ecx, ebx); - movl(edx, Address(esp, 8)); - shrl(edx); - shll(edx); - negl(ecx); - movl(eax, Address(esp, 12)); - shll(eax); - movl(ecx, ebx); - movl(Address(esp, 8), edx); - shrl(edx); - orl(eax, edx); - - bind(B1_14); - fld_x(Address(esp, 8)); - addl(eax, Address(esp, 4)); - fsubp(3); - fmul(6); - fld_s(4); - movl(edx, eax); - andl(edx, 1); - fadd(3); - movl(ecx, Address(esp, 0)); - fsuba(3); - fxch(3); - faddp(5); - fld_s(1); - fxch(3); - fadd_d(Address(zero_none, RelocationHolder::none).plus_disp(edx, Address::times_8)); - fadda(3); - fsub(3); - faddp(2); - fxch(1); - faddp(4); - fld_s(2); - fadd(2); - fsuba(2); - fxch(3); - faddp(2); - fxch(1); - faddp(3); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(2); - fld_s(2); - fld_x(Address(36 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(1); - fld_s(1); - fadd(3); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fxch(1); - fmul(4); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(2); - fld_s(2); - fld_x(Address(48 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(1); - fld_s(1); - fadd(3); - fsuba(3); - fxch(2); - faddp(3); - fxch(2); - faddp(3); - fld_s(3); - fxch(2); - fmul(5); - fld_x(Address(60 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(3); - fxch(3); - faddp(1); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(3); - fld_s(3); - fxch(2); - fmul(5); - fld_x(Address(72 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmula(3); - fxch(3); - faddp(1); - fld_s(0); - fadd(2); - fsuba(2); - fxch(1); - faddp(2); - fxch(1); - faddp(3); - fxch(1); - fmulp(4); - fld_x(Address(84 + _4onpi_31l, RelocationHolder::none).plus_disp(ecx, Address::times_1)); - fmulp(3); - fxch(2); - faddp(3); - fld_s(2); - fadd(2); - fld_d(ExternalAddress(TWO_32H)); //0x00000000UL, 0x41f80000UL - fmul(1); - fadda(1); - fsubp(1); - fsuba(2); - fxch(3); - faddp(2); - faddp(1); - fld_d(ExternalAddress(pi04_2d)); //0x54400000UL, 0x3fe921fbUL - fld_s(0); - fmul(2); - fxch(2); - fadd(3); - fxch(1); - fmulp(3); - fmul_d(as_Address(ExternalAddress(8 + pi04_2d))); //0x1a626331UL, 0x3dc0b461UL - faddp(1); - - bind(B1_15); - fld_d(ExternalAddress(TWO_12H)); //0x00000000UL, 0x40b80000UL - fld_s(2); - fadd(2); - fmula(1); - fstp_x(Address(esp, 8)); - fld_x(Address(esp, 8)); - fadd(1); - fsubrp(1); - fst_d(Address(esi, 0)); - fsubp(2); - faddp(1); - fstp_d(Address(esi, 8)); - addl(esp, 20); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); -} - - -ATTRIBUTE_ALIGNED(16) static const jushort _SP[] = -{ - 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffc, 0x0000, 0x8887, 0x8888, 0x8888, 0x8888, - 0x3ff8, 0x0000, 0xc527, 0x0d00, 0x00d0, 0xd00d, 0xbff2, 0x0000, 0x45f6, 0xb616, - 0x1d2a, 0xb8ef, 0x3fec, 0x0000, 0x825b, 0x3997, 0x2b3f, 0xd732, 0xbfe5, 0x0000, - 0xbf33, 0x8bb4, 0x2fda, 0xb092, 0x3fde, 0x0000, 0x44a6, 0xed1a, 0x29ef, 0xd73e, - 0xbfd6, 0x0000, 0x8610, 0x307f, 0x62a1, 0xc921, 0x3fce, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _CP[] = -{ - 0x0000, 0x0000, 0x0000, 0x8000, 0xbffe, 0x0000, 0xaaa5, 0xaaaa, 0xaaaa, 0xaaaa, - 0x3ffa, 0x0000, 0x9c2f, 0x0b60, 0x60b6, 0xb60b, 0xbff5, 0x0000, 0xf024, 0x0cac, - 0x00d0, 0xd00d, 0x3fef, 0x0000, 0x03fe, 0x3f65, 0x7dbb, 0x93f2, 0xbfe9, 0x0000, - 0xd84d, 0xadee, 0xc698, 0x8f76, 0x3fe2, 0x0000, 0xdaba, 0xfe79, 0xea36, 0xc9c9, - 0xbfda, 0x0000, 0x3ac6, 0x0ba0, 0x07ce, 0xd585, 0x3fd2, 0x0000 -}; - -void MacroAssembler::libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; - Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; - Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_41, B1_42, B1_43, B1_46; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address CP = (address)_CP; - address SP = (address)_SP; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -64); - push(esi); - push(edi); - push(ebx); - subl(esp, 52); - movl(eax, Address(ebp, 16)); - movl(edx, Address(ebp, 20)); - movl(Address(esp, 32), eax); - movl(Address(esp, 36), edx); - - bind(B1_2); - fnstcw(Address(esp, 30)); - - bind(B1_3); - movsd(xmm1, Address(ebp, 8)); - movl(esi, Address(ebp, 12)); - movl(eax, esi); - andl(eax, 2147483647); - andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - shrl(esi, 31); - movl(Address(esp, 40), eax); - cmpl(eax, 1104150528); - movsd(Address(ebp, 8), xmm1); - jcc(Assembler::aboveEqual, B1_11); - - bind(B1_4); - movsd(xmm0, ExternalAddress(PI4_INV)); //0x6dc9c883UL, 0x3ff45f30UL - mulsd(xmm0, xmm1); - movzwl(edx, Address(esp, 30)); - movl(eax, edx); - andl(eax, 768); - movsd(Address(esp, 0), xmm0); - cmpl(eax, 768); - jcc(Assembler::equal, B1_42); - - bind(B1_5); - orl(edx, -64768); - movw(Address(esp, 28), edx); - - bind(B1_6); - fldcw(Address(esp, 28)); - - bind(B1_7); - movsd(xmm1, Address(ebp, 8)); - movl(ebx, 1); - - bind(B1_8); - movl(Address(esp, 12), ebx); - movl(ebx, Address(esp, 4)); - movl(eax, ebx); - movl(Address(esp, 8), esi); - movl(esi, ebx); - shrl(esi, 20); - andl(eax, 1048575); - movl(ecx, esi); - orl(eax, 1048576); - negl(ecx); - movl(edx, eax); - addl(ecx, 19); - addl(esi, 13); - movl(Address(esp, 24), ecx); - shrl(edx); - movl(ecx, esi); - shll(eax); - movl(ecx, Address(esp, 24)); - movl(esi, Address(esp, 0)); - shrl(esi); - orl(eax, esi); - cmpl(ebx, 1094713344); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - cmov32(Assembler::below, eax, edx); - movl(esi, Address(esp, 8)); - lea(edx, Address(eax, 1)); - movl(ebx, edx); - andl(ebx, -2); - movl(Address(esp, 16), ebx); - fild_s(Address(esp, 16)); - movl(ebx, Address(esp, 12)); - cmpl(Address(esp, 40), 1094713344); - jcc(Assembler::aboveEqual, B1_10); - - bind(B1_9); - fld_d(ExternalAddress(PI4X3)); //0x54443000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 8)); //0x3b39a000UL, 0x3d373dcbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 16)); //0xe0e68948UL, 0xba845c06UL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_10); - fld_d(ExternalAddress(PI4X4)); //0x54400000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 8)); //0x1a600000UL, 0xbdc0b461UL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 16)); //0x2e000000UL, 0xbb93198aUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 24)); //0x252049c1UL, 0xb96b839aUL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_11); - movzwl(edx, Address(esp, 30)); - movl(eax, edx); - andl(eax, 768); - cmpl(eax, 768); - jcc(Assembler::equal, B1_43); - bind(B1_12); - orl(edx, -64768); - movw(Address(esp, 28), edx); - - bind(B1_13); - fldcw(Address(esp, 28)); - - bind(B1_14); - movsd(xmm1, Address(ebp, 8)); - movl(ebx, 1); - - bind(B1_15); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - addl(esp, -32); - lea(eax, Address(esp, 32)); - fstp_x(Address(esp, 0)); - movl(Address(esp, 12), 0); - movl(Address(esp, 16), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); - - bind(B1_46); - addl(esp, 32); - - bind(B1_16); - fld_d(Address(esp, 0)); - lea(edx, Address(eax, 1)); - fld_d(Address(esp, 8)); - faddp(1); - - bind(B1_17); - movl(ecx, edx); - addl(eax, 3); - shrl(ecx, 2); - andl(ecx, 1); - shrl(eax, 2); - xorl(esi, ecx); - movl(ecx, Address(esp, 36)); - andl(eax, 1); - andl(ecx, 3); - cmpl(ecx, 3); - jcc(Assembler::notEqual, B1_25); - - bind(B1_18); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - fld_s(1); - fmul((2)); - testb(edx, 2); - fmula((1)); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - faddp(2); - fmula(1); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - faddp(2); - fmula(1); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fmula(1); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fmula(1); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fmula(1); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fmula(1); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(2); - fmula(1); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - fmul(1); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - faddp(1); - fmul(1); - fld_x(ExternalAddress(62 + CP)); //0xd84d, 0xadee, 0xc6 - faddp(1); - fmul(1); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(1); - fmul(1); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(1); - fmul(1); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(1); - fmul(1); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(1); - fmul(1); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(esi, Address::times_8)); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - jcc(Assembler::equal, B1_22); - - bind(B1_19); - fmulp(4); - testl(ebx, ebx); - fxch(2); - fmul(3); - movl(eax, Address(esp, 2)); - faddp(3); - fxch(2); - fstp_d(Address(eax, 0)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_21); - - bind(B1_20); - fldcw(Address(esp, 30)); - - bind(B1_21); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_22); - fxch(1); - fmulp(4); - testl(ebx, ebx); - fxch(2); - fmul(3); - movl(eax, Address(esp, 32)); - faddp(3); - fxch(2); - fstp_d(Address(eax, 8)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_24); - - bind(B1_23); - fldcw(Address(esp, 30)); - - bind(B1_24); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_25); - testb(Address(esp, 36), 2); - jcc(Assembler::equal, B1_33); - - bind(B1_26); - fld_s(0); - testb(edx, 2); - fmul(1); - fld_s(0); - fmul(1); - jcc(Assembler::equal, B1_30); - - bind(B1_27); - fstp_d(2); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - testl(ebx, ebx); - fmul(2); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - fmul(3); - fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 - movl(eax, Address(rsp, 32)); - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(2); - fxch(1); - fmulp(3); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8)); - fmula(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_29); - - bind(B1_28); - fldcw(Address(esp, 30)); - - bind(B1_29); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_30); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - testl(ebx, ebx); - fmul(1); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - fmul(2); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - movl(eax, Address(rsp, 32)); - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmulp(2); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(rsi, Address::times_8)); - fmulp(2); - fmul(1); - faddp(1); - fstp_d(Address(eax, 8)); - jcc(Assembler::equal, B1_32); - - bind(B1_31); - fldcw(Address(esp, 30)); - - bind(B1_32); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_33); - testb(Address(esp, 36), 1); - jcc(Assembler::equal, B1_41); - - bind(B1_34); - fld_s(0); - testb(edx, 2); - fmul(1); - fld_s(0); - fmul(1); - jcc(Assembler::equal, B1_38); - - bind(B1_35); - fld_x(ExternalAddress(84 + SP)); //0x8610, 0x307f, 0x62 - testl(ebx, ebx); - fmul(1); - fld_x(ExternalAddress(72 + SP)); //0x44a6, 0xed1a, 0x29 - fmul(2); - fld_x(ExternalAddress(60 + SP)); //0xbf33, 0x8bb4, 0x2f - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + SP)); //0x825b, 0x3997, 0x2b - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + SP)); //0x45f6, 0xb616, 0x1d - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + SP)); //0xc527, 0x0d00, 0x00 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(12 + SP)); //0x8887, 0x8888, 0x88 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(SP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmulp(2); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmulp(2); - fmul(1); - movl(eax, Address(esp, 32)); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_37); - - bind(B1_36); - fldcw(Address(esp, 30)); - - bind(B1_37); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_38); - fstp_d(2); - fld_x(ExternalAddress(84 + CP)); //0x3ac6, 0x0ba0, 0x07 - testl(ebx, ebx); - fmul(2); - fld_x(ExternalAddress(72 + CP)); //0xdaba, 0xfe79, 0xea - fmul(3); - fld_x(ExternalAddress(60 + CP)); //0xd84d, 0xadee, 0xc6 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(48 + CP)); //0x03fe, 0x3f65, 0x7d - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(36 + CP)); //0xf024, 0x0cac, 0x00 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(24 + CP)); //0x9c2f, 0x0b60, 0x60 - faddp(2); - fxch(1); - fmul(3); - fld_x(ExternalAddress(12 + CP)); //0xaaa5, 0xaaaa, 0xaa - faddp(2); - fxch(1); - fmulp(3); - fld_x(ExternalAddress(CP)); //0x0000, 0x0000, 0x00 - faddp(1); - fmulp(1); - faddp(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - movl(eax, Address(esp, 32)); - faddp(1); - fstp_d(Address(eax, 0)); - jcc(Assembler::equal, B1_40); - - bind(B1_39); - fldcw(Address(esp, 30)); - bind(B1_40); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - bind(B1_41); - fstp_d(0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - bind(B1_42); - xorl(ebx, ebx); - jmp(B1_8); - bind(B1_43); - xorl(ebx, ebx); - jmp(B1_15); -} - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_sin[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, 0xbf73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, - 0xc0000000UL, 0xbc626d19UL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, - 0xbfa60beaUL, 0x2ed59f06UL, 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, - 0x00000000UL, 0x3ff00000UL, 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, - 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, 0x00000000UL, 0x3ff00000UL, - 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, 0x20000000UL, - 0x3c5e0d89UL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, 0xbfc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0x3ff00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, - 0x20000000UL, 0x3c68076aUL, 0x00000000UL, 0x3ff00000UL, 0x99fcef32UL, - 0x3fca8279UL, 0x667f3bcdUL, 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, - 0x00000000UL, 0x3fe00000UL, 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, - 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, 0x00000000UL, 0x3fe00000UL, - 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, 0xe0000000UL, - 0x3c39f630UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, 0xbf9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0x3fe00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0x3fed906bUL, - 0x20000000UL, 0x3c7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x76acf82dUL, - 0x3fa4a031UL, 0x56c62ddaUL, 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, - 0x00000000UL, 0x3fd00000UL, 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, - 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, 0x00000000UL, 0x3fd00000UL, - 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, 0x40000000UL, - 0xbc887df6UL, 0x00000000UL, 0x3fc00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0x3fefd88dUL, - 0x40000000UL, 0xbc887df6UL, 0x00000000UL, 0xbfc00000UL, 0x0e5967d5UL, - 0x3fac1d1fUL, 0xcff75cb0UL, 0x3fef6297UL, 0x20000000UL, 0x3c756217UL, - 0x00000000UL, 0xbfd00000UL, 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, - 0x3fee9f41UL, 0xe0000000UL, 0x3c8760b1UL, 0x00000000UL, 0xbfd00000UL, - 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, 0x3fed906bUL, 0x20000000UL, - 0x3c7457e6UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, 0x3f9d4a2cUL, - 0xf180bdb1UL, 0x3fec38b2UL, 0x80000000UL, 0xbc76e0b1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0x3fea9b66UL, - 0xe0000000UL, 0x3c39f630UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, - 0xbfc133ccUL, 0x6b151741UL, 0x3fe8bc80UL, 0x20000000UL, 0xbc82c5e1UL, - 0x00000000UL, 0xbfe00000UL, 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, - 0x3fe6a09eUL, 0x20000000UL, 0xbc8bdd34UL, 0x00000000UL, 0xbfe00000UL, - 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, 0x3fe44cf3UL, 0x20000000UL, - 0x3c68076aUL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, 0x3fc59267UL, - 0x39ae68c8UL, 0x3fe1c73bUL, 0x20000000UL, 0x3c8b25ddUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0x3fde2b5dUL, - 0x20000000UL, 0x3c5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, - 0x3fb37ca1UL, 0xa6aea963UL, 0x3fd87de2UL, 0xe0000000UL, 0xbc672cedUL, - 0x00000000UL, 0xbff00000UL, 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, - 0x3fd29406UL, 0xa0000000UL, 0xbc75d28dUL, 0x00000000UL, 0xbff00000UL, - 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, 0x3fc8f8b8UL, 0xc0000000UL, - 0xbc626d19UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, 0x3f73b92eUL, - 0xbc29b42cUL, 0x3fb917a6UL, 0xe0000000UL, 0xbc3e2718UL, 0x00000000UL, - 0xbff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x176d6d31UL, - 0x3f73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0xbff00000UL, 0x011469fbUL, 0x3f93ad06UL, 0x3c69a60bUL, - 0xbfc8f8b8UL, 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0xbff00000UL, - 0x939d225aUL, 0x3fa60beaUL, 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, - 0x3c75d28dUL, 0x00000000UL, 0xbff00000UL, 0x866b95cfUL, 0x3fb37ca1UL, - 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, 0x3c672cedUL, 0x00000000UL, - 0xbff00000UL, 0x73fa1279UL, 0x3fbe3a68UL, 0x3806f63bUL, 0xbfde2b5dUL, - 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0xbff00000UL, 0x5bc57974UL, - 0x3fc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0xbff00000UL, 0x53aba2fdUL, 0x3fcd0dfeUL, 0x25091dd6UL, - 0xbfe44cf3UL, 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0xbff00000UL, - 0x99fcef32UL, 0xbfca8279UL, 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, - 0x3c8bdd34UL, 0x00000000UL, 0xbfe00000UL, 0x94247758UL, 0xbfc133ccUL, - 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, 0x3c82c5e1UL, 0x00000000UL, - 0xbfe00000UL, 0x9ae68c87UL, 0xbfac73b3UL, 0x290ea1a3UL, 0xbfea9b66UL, - 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0xbfe00000UL, 0x7f909c4eUL, - 0x3f9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0xbfe00000UL, 0x65455a75UL, 0x3fbe0875UL, 0xcf328d46UL, - 0xbfed906bUL, 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0xbfe00000UL, - 0x76acf82dUL, 0xbfa4a031UL, 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, - 0xbc8760b1UL, 0x00000000UL, 0xbfd00000UL, 0x0e5967d5UL, 0x3fac1d1fUL, - 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, 0xbc756217UL, 0x00000000UL, - 0xbfd00000UL, 0x0f592f50UL, 0x3f9ba165UL, 0xa3d12526UL, 0xbfefd88dUL, - 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0xbfc00000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0xbff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x0f592f50UL, 0xbf9ba165UL, 0xa3d12526UL, - 0xbfefd88dUL, 0x40000000UL, 0x3c887df6UL, 0x00000000UL, 0x3fc00000UL, - 0x0e5967d5UL, 0xbfac1d1fUL, 0xcff75cb0UL, 0xbfef6297UL, 0x20000000UL, - 0xbc756217UL, 0x00000000UL, 0x3fd00000UL, 0x76acf82dUL, 0x3fa4a031UL, - 0x56c62ddaUL, 0xbfee9f41UL, 0xe0000000UL, 0xbc8760b1UL, 0x00000000UL, - 0x3fd00000UL, 0x65455a75UL, 0xbfbe0875UL, 0xcf328d46UL, 0xbfed906bUL, - 0x20000000UL, 0xbc7457e6UL, 0x00000000UL, 0x3fe00000UL, 0x7f909c4eUL, - 0xbf9d4a2cUL, 0xf180bdb1UL, 0xbfec38b2UL, 0x80000000UL, 0x3c76e0b1UL, - 0x00000000UL, 0x3fe00000UL, 0x9ae68c87UL, 0x3fac73b3UL, 0x290ea1a3UL, - 0xbfea9b66UL, 0xe0000000UL, 0xbc39f630UL, 0x00000000UL, 0x3fe00000UL, - 0x94247758UL, 0x3fc133ccUL, 0x6b151741UL, 0xbfe8bc80UL, 0x20000000UL, - 0x3c82c5e1UL, 0x00000000UL, 0x3fe00000UL, 0x99fcef32UL, 0x3fca8279UL, - 0x667f3bcdUL, 0xbfe6a09eUL, 0x20000000UL, 0x3c8bdd34UL, 0x00000000UL, - 0x3fe00000UL, 0x53aba2fdUL, 0xbfcd0dfeUL, 0x25091dd6UL, 0xbfe44cf3UL, - 0x20000000UL, 0xbc68076aUL, 0x00000000UL, 0x3ff00000UL, 0x5bc57974UL, - 0xbfc59267UL, 0x39ae68c8UL, 0xbfe1c73bUL, 0x20000000UL, 0xbc8b25ddUL, - 0x00000000UL, 0x3ff00000UL, 0x73fa1279UL, 0xbfbe3a68UL, 0x3806f63bUL, - 0xbfde2b5dUL, 0x20000000UL, 0xbc5e0d89UL, 0x00000000UL, 0x3ff00000UL, - 0x866b95cfUL, 0xbfb37ca1UL, 0xa6aea963UL, 0xbfd87de2UL, 0xe0000000UL, - 0x3c672cedUL, 0x00000000UL, 0x3ff00000UL, 0x939d225aUL, 0xbfa60beaUL, - 0x2ed59f06UL, 0xbfd29406UL, 0xa0000000UL, 0x3c75d28dUL, 0x00000000UL, - 0x3ff00000UL, 0x011469fbUL, 0xbf93ad06UL, 0x3c69a60bUL, 0xbfc8f8b8UL, - 0xc0000000UL, 0x3c626d19UL, 0x00000000UL, 0x3ff00000UL, 0x176d6d31UL, - 0xbf73b92eUL, 0xbc29b42cUL, 0xbfb917a6UL, 0xe0000000UL, 0x3c3e2718UL, - 0x00000000UL, 0x3ff00000UL, 0x55555555UL, 0xbfc55555UL, 0x00000000UL, - 0xbfe00000UL, 0x11111111UL, 0x3f811111UL, 0x55555555UL, 0x3fa55555UL, - 0x1a01a01aUL, 0xbf2a01a0UL, 0x16c16c17UL, 0xbf56c16cUL, 0xa556c734UL, - 0x3ec71de3UL, 0x1a01a01aUL, 0x3efa01a0UL, 0x1a600000UL, 0x3d90b461UL, - 0x1a600000UL, 0x3d90b461UL, 0x54400000UL, 0x3fb921fbUL, 0x00000000UL, - 0x00000000UL, 0x2e037073UL, 0x3b63198aUL, 0x00000000UL, 0x00000000UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x43380000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, - 0x00000000UL, 0xffffffffUL, 0x3fefffffUL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x80000000UL, 0x00000000UL, 0x80000000UL, 0x00000000UL, 0x3fe00000UL, - 0x00000000UL, 0x3fe00000UL -}; - -void MacroAssembler::fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, - XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, - Register eax, Register ebx, Register edx) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2; - - assert_different_registers(eax, ebx, edx); - - address static_const_table_sin = (address)_static_const_table_sin; - - subl(rsp, 120); - movl(Address(rsp, 56), ebx); - lea(ebx, ExternalAddress(static_const_table_sin)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 12336); - cmpl(eax, 4293); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movsd(xmm1, Address(ebx, 2160)); - mulsd(xmm1, xmm0); - movsd(xmm5, Address(ebx, 2272)); - movdqu(xmm4, Address(ebx, 2256)); - pand(xmm4, xmm0); - por(xmm5, xmm4); - movsd(xmm3, Address(ebx, 2128)); - movdqu(xmm2, Address(ebx, 2112)); - addpd(xmm1, xmm5); - cvttsd2sil(edx, xmm1); - cvtsi2sdl(xmm1, edx); - mulsd(xmm3, xmm1); - unpcklpd(xmm1, xmm1); - addl(edx, 1865216); - movdqu(xmm4, xmm0); - andl(edx, 63); - movdqu(xmm5, Address(ebx, 2096)); - lea(eax, Address(ebx, 0)); - shll(edx, 5); - addl(eax, edx); - mulpd(xmm2, xmm1); - subsd(xmm0, xmm3); - mulsd(xmm1, Address(ebx, 2144)); - subsd(xmm4, xmm3); - movsd(xmm7, Address(eax, 8)); - unpcklpd(xmm0, xmm0); - movapd(xmm3, xmm4); - subsd(xmm4, xmm2); - mulpd(xmm5, xmm0); - subpd(xmm0, xmm2); - movdqu(xmm6, Address(ebx, 2064)); - mulsd(xmm7, xmm4); - subsd(xmm3, xmm4); - mulpd(xmm5, xmm0); - mulpd(xmm0, xmm0); - subsd(xmm3, xmm2); - movdqu(xmm2, Address(eax, 0)); - subsd(xmm1, xmm3); - movsd(xmm3, Address(eax, 24)); - addsd(xmm2, xmm3); - subsd(xmm7, xmm2); - mulsd(xmm2, xmm4); - mulpd(xmm6, xmm0); - mulsd(xmm3, xmm4); - mulpd(xmm2, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm5, Address(ebx, 2080)); - mulsd(xmm4, Address(eax, 0)); - addpd(xmm6, Address(ebx, 2048)); - mulpd(xmm5, xmm0); - movapd(xmm0, xmm3); - addsd(xmm3, Address(eax, 8)); - mulpd(xmm1, xmm7); - movapd(xmm7, xmm4); - addsd(xmm4, xmm3); - addpd(xmm6, xmm5); - movsd(xmm5, Address(eax, 8)); - subsd(xmm5, xmm3); - subsd(xmm3, xmm4); - addsd(xmm1, Address(eax, 16)); - mulpd(xmm6, xmm2); - addsd(xmm5, xmm0); - addsd(xmm3, xmm7); - addsd(xmm1, xmm5); - addsd(xmm1, xmm3); - addsd(xmm1, xmm6); - unpckhpd(xmm6, xmm6); - addsd(xmm1, xmm6); - addsd(xmm4, xmm1); - movsd(Address(rsp, 0), xmm4); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - shrl(eax, 4); - cmpl(eax, 268434685); - jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_3_0_2); - movsd(xmm3, Address(ebx, 2192)); - mulsd(xmm3, xmm0); - subsd(xmm3, xmm0); - mulsd(xmm3, Address(ebx, 2208)); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movl(eax, Address(rsp, 132)); - andl(eax, 2146435072); - cmpl(eax, 2146435072); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 2); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_sin_cos_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 16)); - jmp(L_2TAG_PACKET_1_0_2); - bind(L_2TAG_PACKET_4_0_2); - fld_d(Address(rsp, 128)); - fmul_d(Address(ebx, 2240)); - bind(L_2TAG_PACKET_1_0_2); - movl(ebx, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp deleted file mode 100644 index f2bc1efb483f9..0000000000000 --- a/src/hotspot/cpu/x86/macroAssembler_x86_32_tan.cpp +++ /dev/null @@ -1,1173 +0,0 @@ -/* -* Copyright (c) 2016, 2021, Intel Corporation. All rights reserved. -* Intel Math Library (LIBM) Source Code -* -* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -* -* This code is free software; you can redistribute it and/or modify it -* under the terms of the GNU General Public License version 2 only, as -* published by the Free Software Foundation. -* -* This code is distributed in the hope that it will be useful, but WITHOUT -* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -* version 2 for more details (a copy is included in the LICENSE file that -* accompanied this code). -* -* You should have received a copy of the GNU General Public License version -* 2 along with this work; if not, write to the Free Software Foundation, -* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -* -* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -* or visit www.oracle.com if you need additional information or have any -* questions. -* -*/ - -#include "precompiled.hpp" -#include "asm/assembler.hpp" -#include "asm/assembler.inline.hpp" -#include "macroAssembler_x86.hpp" -#include "runtime/stubRoutines.hpp" -#include "utilities/globalDefinitions.hpp" - -/******************************************************************************/ -// ALGORITHM DESCRIPTION - TAN() -// --------------------- -// -// Polynomials coefficients and other constants. -// -// Note that in this algorithm, there is a different polynomial for -// each breakpoint, so there are 32 sets of polynomial coefficients -// as well as 32 instances of the other constants. -// -// The polynomial coefficients and constants are offset from the start -// of the main block as follows: -// -// 0: c8 | c0 -// 16: c9 | c1 -// 32: c10 | c2 -// 48: c11 | c3 -// 64: c12 | c4 -// 80: c13 | c5 -// 96: c14 | c6 -// 112: c15 | c7 -// 128: T_hi -// 136: T_lo -// 144: Sigma -// 152: T_hl -// 160: Tau -// 168: Mask -// 176: (end of block) -// -// The total table size is therefore 5632 bytes. -// -// Note that c0 and c1 are always zero. We could try storing -// other constants here, and just loading the low part of the -// SIMD register in these cases, after ensuring the high part -// is zero. -// -// The higher terms of the polynomial are computed in the *low* -// part of the SIMD register. This is so we can overlap the -// multiplication by r^8 and the unpacking of the other part. -// -// The constants are: -// T_hi + T_lo = accurate constant term in power series -// Sigma + T_hl = accurate coefficient of r in power series (Sigma=1 bit) -// Tau = multiplier for the reciprocal, always -1 or 0 -// -// The basic reconstruction formula using these constants is: -// -// High = tau * recip_hi + t_hi -// Med = (sgn * r + t_hl * r)_hi -// Low = (sgn * r + t_hl * r)_lo + -// tau * recip_lo + T_lo + (T_hl + sigma) * c + pol -// -// where pol = c0 + c1 * r + c2 * r^2 + ... + c15 * r^15 -// -// (c0 = c1 = 0, but using them keeps SIMD regularity) -// -// We then do a compensated sum High + Med, add the low parts together -// and then do the final sum. -// -// Here recip_hi + recip_lo is an accurate reciprocal of the remainder -// modulo pi/2 -// -// Special cases: -// tan(NaN) = quiet NaN, and raise invalid exception -// tan(INF) = NaN and raise invalid exception -// tan(+/-0) = +/-0 -// -/******************************************************************************/ - -// The 32 bit code is at most SSE2 compliant - -ATTRIBUTE_ALIGNED(16) static const jushort _TP[] = -{ - 0x4cd6, 0xaf6c, 0xc710, 0xc662, 0xbffd, 0x0000, 0x4b06, 0xb0ac, 0xd3b2, 0xcc2c, - 0x3ff9, 0x0000, 0x00e3, 0xc850, 0xaa28, 0x9533, 0xbff3, 0x0000, 0x2ff0, 0x466d, - 0x1a3b, 0xb266, 0x3fe5, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _TQ[] = -{ - 0x399c, 0x8391, 0x154c, 0x94ca, 0xbfff, 0x0000, 0xb6a3, 0xc36a, 0x44e2, 0x8a2c, - 0x3ffe, 0x0000, 0xb70f, 0xd068, 0xa6ce, 0xe9dd, 0xbff9, 0x0000, 0x820f, 0x51ce, - 0x7d76, 0x9bff, 0x3ff3, 0x0000 -}; - -ATTRIBUTE_ALIGNED(16) static const jushort _GP[] = -{ - 0xaaab, 0xaaaa, 0xaaaa, 0xaaaa, 0xbffd, 0x0000, 0xb62f, 0x0b60, 0x60b6, 0xb60b, - 0xbff9, 0x0000, 0xdfa7, 0x08aa, 0x55e0, 0x8ab3, 0xbff6, 0x0000, 0x85a0, 0xa819, - 0xbc99, 0xddeb, 0xbff2, 0x0000, 0x7065, 0x6a37, 0x795f, 0xb354, 0xbfef, 0x0000, - 0xa8f9, 0x83f1, 0x2ec8, 0x9140, 0xbfec, 0x0000, 0xf3ca, 0x8c96, 0x8e0b, 0xeb6d, - 0xbfe8, 0x0000, 0x355b, 0xd910, 0x67c9, 0xbed3, 0xbfe5, 0x0000, 0x286b, 0xb49e, - 0xb854, 0x9a98, 0xbfe2, 0x0000, 0x0871, 0x1a2f, 0x6477, 0xfcc4, 0xbfde, 0x0000, - 0xa559, 0x1da9, 0xaed2, 0xba76, 0xbfdb, 0x0000, 0x00a3, 0x7fea, 0x9bc3, 0xf205, - 0xbfd8, 0x0000 -}; - -void MacroAssembler::libm_tancot_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx, Register edx, Register ebx, Register esi, Register edi, Register ebp, Register esp) { - Label B1_1, B1_2, B1_3, B1_4, B1_5, B1_6, B1_7, B1_8, B1_9, B1_10, B1_11, B1_12; - Label B1_13, B1_14, B1_15, B1_16, B1_17, B1_18, B1_19, B1_20, B1_21, B1_22, B1_23; - Label B1_24, B1_25, B1_26, B1_27, B1_28, B1_29, B1_30, B1_31, B1_32, B1_33, B1_34; - Label B1_35, B1_36, B1_37, B1_38, B1_39, B1_40, B1_43; - - assert_different_registers(ebx, eax, ecx, edx, esi, edi, ebp, esp); - - address TP = (address)_TP; - address TQ = (address)_TQ; - address GP = (address)_GP; - - bind(B1_1); - push(ebp); - movl(ebp, esp); - andl(esp, -64); - push(esi); - push(edi); - push(ebx); - subl(esp, 52); - movl(eax, Address(ebp, 16)); - movl(ebx, Address(ebp, 20)); - movl(Address(esp, 40), eax); - - bind(B1_2); - fnstcw(Address(esp, 38)); - - bind(B1_3); - movl(edx, Address(ebp, 12)); - movl(eax, edx); - andl(eax, 2147483647); - shrl(edx, 31); - movl(Address(esp, 44), edx); - cmpl(eax, 1104150528); - jcc(Assembler::aboveEqual, B1_11); - - bind(B1_4); - movsd(xmm1, Address(ebp, 8)); - movzwl(ecx, Address(esp, 38)); - movl(edx, ecx); - andl(edx, 768); - andps(xmm1, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - cmpl(edx, 768); - movsd(xmm0, ExternalAddress(PI4_INV)); ////0x6dc9c883UL, 0x3ff45f30UL - mulsd(xmm0, xmm1); - movsd(Address(ebp, 8), xmm1); - movsd(Address(esp, 0), xmm0); - jcc(Assembler::equal, B1_39); - - bind(B1_5); - orl(ecx, -64768); - movw(Address(esp, 36), ecx); - - bind(B1_6); - fldcw(Address(esp, 36)); - - bind(B1_7); - movsd(xmm1, Address(ebp, 8)); - movl(edi, 1); - - bind(B1_8); - movl(Address(esp, 12), esi); - movl(esi, Address(esp, 4)); - movl(edx, esi); - movl(Address(esp, 24), edi); - movl(edi, esi); - shrl(edi, 20); - andl(edx, 1048575); - movl(ecx, edi); - orl(edx, 1048576); - negl(ecx); - addl(edi, 13); - movl(Address(esp, 8), ebx); - addl(ecx, 19); - movl(ebx, edx); - movl(Address(esp, 28), ecx); - shrl(ebx); - movl(ecx, edi); - shll(edx); - movl(ecx, Address(esp, 28)); - movl(edi, Address(esp, 0)); - shrl(edi); - orl(edx, edi); - cmpl(esi, 1094713344); - movsd(Address(esp, 16), xmm1); - fld_d(Address(esp, 16)); - cmov32(Assembler::below, edx, ebx); - movl(edi, Address(esp, 24)); - movl(esi, Address(esp, 12)); - lea(ebx, Address(edx, 1)); - andl(ebx, -2); - movl(Address(esp, 16), ebx); - cmpl(eax, 1094713344); - fild_s(Address(esp, 16)); - movl(ebx, Address(esp, 8)); - jcc(Assembler::aboveEqual, B1_10); - - bind(B1_9); - fld_d(ExternalAddress(PI4X3)); //0x54443000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 8)); //0x3b39a000UL, 0x3d373dcbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X3 + 16)); //0xe0e68948UL, 0xba845c06UL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_10); - fld_d(ExternalAddress(PI4X4)); //0x54400000UL, 0xbfe921fbUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 8)); //0x1a600000UL, 0xbdc0b461UL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 16)); //0x2e000000UL, 0xbb93198aUL - fmul(1); - faddp(2); - fld_d(ExternalAddress(PI4X4 + 24)); //0x252049c1UL, 0xb96b839aUL - fmulp(1); - faddp(1); - jmp(B1_17); - - bind(B1_11); - movzwl(edx, Address(esp, 38)); - movl(eax, edx); - andl(eax, 768); - cmpl(eax, 768); - jcc(Assembler::equal, B1_40); - - bind(B1_12); - orl(edx, -64768); - movw(Address(esp, 36), edx); - - bind(B1_13); - fldcw(Address(esp, 36)); - - bind(B1_14); - movl(edi, 1); - - bind(B1_15); - movsd(xmm0, Address(ebp, 8)); - addl(esp, -32); - andps(xmm0, ExternalAddress(L_2IL0FLOATPACKET_0)); //0xffffffffUL, 0x7fffffffUL, 0x00000000UL, 0x00000000UL - lea(eax, Address(esp, 32)); - movsd(Address(eax, 16), xmm0); - fld_d(Address(eax, 16)); - fstp_x(Address(esp, 0)); - movl(Address(esp, 12), 0); - movl(Address(esp, 16), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_reduce_pi04l()))); - - bind(B1_43); - movl(edx, eax); - addl(esp, 32); - - bind(B1_16); - fld_d(Address(esp, 0)); - fld_d(Address(esp, 8)); - faddp(1); - - bind(B1_17); - movl(eax, ebx); - andl(eax, 3); - cmpl(eax, 3); - jcc(Assembler::notEqual, B1_24); - - bind(B1_18); - fld_d(ExternalAddress(ONES)); - incl(edx); - fdiv(1); - testb(edx, 2); - fstp_x(Address(esp, 24)); - fld_s(0); - fmul(1); - fld_s(0); - fmul(1); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(2); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(1); - fmul(2); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(3); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(3); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(3); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(3); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(4); - fld_x(ExternalAddress(108 + GP)); //0x0871, 0x1a2f, 0x64 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(72 + GP)); //0x8c96, 0x8e0b, 0xeb - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(4); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(4); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmul(4); - fmul(5); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(4); - fxch(3); - fmul(5); - faddp(3); - jcc(Assembler::equal, B1_20); - - bind(B1_19); - fld_x(Address(esp, 24)); - fxch(1); - fdivrp(2); - fxch(1); - fmulp(3); - movl(eax, Address(esp, 44)); - xorl(eax, 1); - fxch(2); - fmul(3); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmula(3); - fxch(3); - faddp(2); - fxch(1); - fstp_d(Address(esp, 16)); - fmul(1); - fxch(1); - fmulp(2); - movsd(xmm0, Address(esp, 16)); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm1, Address(esp, 16)); - jmp(B1_21); - - bind(B1_20); - fdivrp(1); - fmulp(2); - fxch(1); - fmul(2); - movl(eax, Address(esp, 44)); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmula(3); - fxch(3); - faddp(1); - fstp_d(Address(esp, 16)); - fmul(1); - fld_x(Address(esp, 24)); - fmulp(2); - movsd(xmm0, Address(esp, 16)); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm1, Address(esp, 16)); - - bind(B1_21); - testl(edi, edi); - jcc(Assembler::equal, B1_23); - - bind(B1_22); - fldcw(Address(esp, 38)); - - bind(B1_23); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 0), xmm0); - movsd(Address(eax, 8), xmm1); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_24); - testb(ebx, 2); - jcc(Assembler::equal, B1_31); - - bind(B1_25); - incl(edx); - fld_s(0); - fmul(1); - testb(edx, 2); - jcc(Assembler::equal, B1_27); - - bind(B1_26); - fld_d(ExternalAddress(ONES)); - fdiv(2); - fld_s(1); - fmul(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(1); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(2); - fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf - movl(eax, Address(esp, 44)); - faddp(2); - fxch(1); - fmul(2); - xorl(eax, 1); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmulp(3); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmul(3); - fxch(2); - fmulp(3); - fxch(1); - faddp(2); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmulp(1); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - jmp(B1_28); - - bind(B1_27); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(1); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - movl(eax, Address(esp, 44)); - faddp(1); - fmul(1); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(2); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(2); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(1); - fdivrp(1); - fmulp(1); - fmul(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmulp(2); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - - bind(B1_28); - testl(edi, edi); - jcc(Assembler::equal, B1_30); - - bind(B1_29); - fldcw(Address(esp, 38)); - - bind(B1_30); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 0), xmm0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - movl(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_31); - testb(ebx, 1); - jcc(Assembler::equal, B1_38); - - bind(B1_32); - incl(edx); - fld_s(0); - fmul(1); - testb(edx, 2); - jcc(Assembler::equal, B1_34); - - bind(B1_33); - fld_x(ExternalAddress(36 + TP)); //0x2ff0, 0x466d, 0x1a - fmul(1); - fld_x(ExternalAddress(24 + TP)); //0x00e3, 0xc850, 0xaa - movl(eax, Address(esp, 44)); - faddp(1); - fmul(1); - xorl(eax, 1); - fld_x(ExternalAddress(36 + TQ)); //0x820f, 0x51ce, 0x7d - fmul(2); - fld_x(ExternalAddress(24 + TQ)); //0xb70f, 0xd068, 0xa6 - faddp(1); - fmul(2); - fld_x(ExternalAddress(12 + TQ)); //0xb6a3, 0xc36a, 0x44 - faddp(1); - fmul(2); - fld_x(ExternalAddress(TQ)); //0x399c, 0x8391, 0x15 - faddp(1); - fld_x(ExternalAddress(12 + TP)); //0x4b06, 0xb0ac, 0xd3 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(TP)); //0x4cd6, 0xaf6c, 0xc7 - faddp(1); - fdivrp(1); - fmulp(1); - fmul(1); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(1); - fmulp(2); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - jmp(B1_35); - - bind(B1_34); - fld_d(ExternalAddress(ONES)); - fdiv(2); - fld_s(1); - fmul(2); - fld_x(ExternalAddress(132 + GP)); //0x00a3, 0x7fea, 0x9b - fmul(1); - fld_x(ExternalAddress(120 + GP)); //0xa559, 0x1da9, 0xae - fmul(2); - fld_x(ExternalAddress(108 + GP)); //0x67c9, 0xbed3, 0xbf - movl(eax, Address(esp, 44)); - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(96 + GP)); //0x286b, 0xb49e, 0xb8 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(84 + GP)); //0x355b, 0xd910, 0x67 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(72 + GP)); //0xf3ca, 0x8c96, 0x8e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(60 + GP)); //0xa8f9, 0x83f1, 0x2e - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(48 + GP)); //0x7065, 0x6a37, 0x79 - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(36 + GP)); //0x85a0, 0xa819, 0xbc - faddp(2); - fxch(1); - fmul(2); - fld_x(ExternalAddress(24 + GP)); //0xdfa7, 0x08aa, 0x55 - faddp(2); - fxch(1); - fmulp(2); - fld_x(ExternalAddress(12 + GP)); //0xb62f, 0x0b60, 0x60 - faddp(1); - fmulp(3); - fld_x(ExternalAddress(GP)); //0xaaab, 0xaaaa, 0xaa - faddp(1); - fmul(3); - fxch(2); - fmulp(3); - fxch(1); - faddp(2); - fld_d(Address(ONES, RelocationHolder::none).plus_disp(eax, Address::times_8)); - fmula(2); - fmulp(1); - faddp(1); - fstp_d(Address(esp, 16)); - movsd(xmm0, Address(esp, 16)); - - bind(B1_35); - testl(edi, edi); - jcc(Assembler::equal, B1_37); - - bind(B1_36); - fldcw(Address(esp, 38)); - - bind(B1_37); - movl(eax, Address(esp, 40)); - movsd(Address(eax, 8), xmm0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - mov(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_38); - fstp_d(0); - addl(esp, 52); - pop(ebx); - pop(edi); - pop(esi); - mov(esp, ebp); - pop(ebp); - ret(0); - - bind(B1_39); - xorl(edi, edi); - jmp(B1_8); - - bind(B1_40); - xorl(edi, edi); - jmp(B1_15); -} - -ATTRIBUTE_ALIGNED(16) static const juint _static_const_table_tan[] = -{ - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x882c10faUL, - 0x3f9664f4UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x55e6c23dUL, 0x3f8226e3UL, 0x55555555UL, - 0x3fd55555UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x0e157de0UL, 0x3f6d6d3dUL, 0x11111111UL, 0x3fc11111UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x452b75e3UL, 0x3f57da36UL, - 0x1ba1ba1cUL, 0x3faba1baUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, - 0x3f953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, - 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0x3f85ad63UL, 0xdc230b9bUL, - 0x3fb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, - 0x77bb08baUL, 0x3f757c85UL, 0xb6247521UL, 0x3fb1381eUL, 0x5922170cUL, - 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0x3f64e391UL, - 0x3e666320UL, 0x3fa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, - 0x3fafa8aeUL, 0x8c5b2da2UL, 0x3fb936bbUL, 0x4e88f7a5UL, 0x3c587d05UL, - 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x5a279ea3UL, 0x3faa3407UL, - 0x00000000UL, 0x00000000UL, 0x432d65faUL, 0x3fa70153UL, 0x00000000UL, - 0x00000000UL, 0x891a4602UL, 0x3f9d03efUL, 0xd62ca5f8UL, 0x3fca77d9UL, - 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, 0x3fd8cf51UL, 0xb58fd909UL, - 0x3f8f88e3UL, 0x01771ceaUL, 0x3fc2b154UL, 0xf3562f8eUL, 0x3f888f57UL, - 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, 0x3f80f44cUL, 0x214368e9UL, - 0x3fb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, 0x172dbbf0UL, 0x3fb6cb8eUL, - 0xe0553158UL, 0x3fc975f5UL, 0x593fe814UL, 0x3c2ef5d3UL, 0x00000000UL, - 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x9314533eUL, 0x3fbb8ec5UL, 0x00000000UL, - 0x00000000UL, 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, - 0xdcb427fdUL, 0x3fb13950UL, 0xd87ab0bbUL, 0x3fd5335eUL, 0xce0ae8a5UL, - 0x3fabb382UL, 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0x3fa552f1UL, - 0x59f21a6dUL, 0x3fd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, - 0x3fd0576cUL, 0x8f2c2950UL, 0x3f9a4898UL, 0xc0b3f22cUL, 0x3fc59462UL, - 0x1883a4b8UL, 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, - 0x3fd36a08UL, 0x1dce993dUL, 0xbc6d704dUL, 0x00000000UL, 0x3ff00000UL, - 0x2b82ab63UL, 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x56f37042UL, 0x3fccfc56UL, 0x00000000UL, 0x00000000UL, - 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, 0x3d0e7c5dUL, - 0x3fc50533UL, 0x9bed9b2eUL, 0x3fdf0ed9UL, 0x5fe7c47cUL, 0x3fc1f250UL, - 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0x3fbe5c71UL, 0x86362c20UL, - 0x3fda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, 0x3fd911bdUL, - 0xb56658beUL, 0x3fb5e4c7UL, 0x93a2fd76UL, 0x3fd3c092UL, 0xda271794UL, - 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, 0x3fda8279UL, - 0xb68c1467UL, 0x3c708b2fUL, 0x00000000UL, 0x3ff00000UL, 0x980c4337UL, - 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0xcc03e501UL, 0x3fdff10fUL, 0x00000000UL, 0x00000000UL, 0x44a4e845UL, - 0x3fddb63bUL, 0x00000000UL, 0x00000000UL, 0x3768ad9fUL, 0x3fdb72a4UL, - 0x3dd01ccaUL, 0x3fe5fdb9UL, 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, - 0x3fe977f9UL, 0xd013b3abUL, 0x3fd78ca3UL, 0xbf0bf914UL, 0x3fe4f192UL, - 0x4d53e730UL, 0x3fd5d060UL, 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, - 0x3fd4322aUL, 0x5936a835UL, 0x3fe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, - 0xef478605UL, 0x3fe1659eUL, 0x190834ecUL, 0x3fe11ab7UL, 0xcdb625eaUL, - 0xbc8e564bUL, 0x00000000UL, 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, - 0x3ff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, - 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0x3ff3972eUL, 0xe93463bdUL, - 0x3feeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, - 0xa04e8ea3UL, 0x3ff4541aUL, 0x386accd3UL, 0x3ff1369eUL, 0x222a66ddUL, - 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0x3ff5178fUL, - 0xddaa0031UL, 0x3ff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, - 0x3ff29311UL, 0x2ab7f990UL, 0x3fe561b8UL, 0x209c7df1UL, 0x3c87a8c5UL, - 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc7ab4d5aUL, 0x40085e24UL, - 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, 0x400b963dUL, 0x00000000UL, - 0x00000000UL, 0x94a7f25aUL, 0x400f37e2UL, 0x4b6261cbUL, 0x3ff5f984UL, - 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, 0x3ffaf5a5UL, 0x7f2ce8e3UL, - 0x4013fe8bUL, 0xfe8e54faUL, 0x3ffd7334UL, 0x670d618dUL, 0x4016a10cUL, - 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, 0x40199c5fUL, 0x697d6eceUL, - 0x4003006eUL, 0x83298b82UL, 0x401cfc4dUL, 0x19d490d6UL, 0x40058c19UL, - 0x2ae42850UL, 0x3fea4300UL, 0x118e20e6UL, 0xbc7a6db8UL, 0x00000000UL, - 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x65965966UL, 0x40219659UL, 0x00000000UL, - 0x00000000UL, 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, - 0x83cd3723UL, 0x402c8342UL, 0x00000000UL, 0x40000000UL, 0x55e6c23dUL, - 0x403226e3UL, 0x55555555UL, 0x40055555UL, 0x34451939UL, 0x40371c96UL, - 0xaaaaaaabUL, 0x400aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, - 0x40111111UL, 0xa738201fUL, 0x4042bbceUL, 0x05b05b06UL, 0x4015b05bUL, - 0x452b75e3UL, 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x4f48b8d3UL, 0xbf33eaf9UL, 0x00000000UL, 0x00000000UL, - 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, 0xd0258911UL, - 0xbf0abaf3UL, 0x23e49fe9UL, 0xbfab5a8cUL, 0x2d53222eUL, 0x3ef60d15UL, - 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0xbee1d3b5UL, 0xdbf93b8eUL, - 0xbf84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, 0x3f743924UL, - 0x794a8297UL, 0xbeb7b7b9UL, 0xe015f797UL, 0xbf5d41f5UL, 0xe41a4a56UL, - 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, 0xbfce49ceUL, - 0x8c743719UL, 0x3d1eb860UL, 0x00000000UL, 0x00000000UL, 0x1b4863cfUL, - 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x535ad890UL, 0xbf2b9320UL, 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, - 0x3f16d61dUL, 0x00000000UL, 0x00000000UL, 0x0359f1beUL, 0xbf0139e4UL, - 0xa4317c6dUL, 0xbfa67e17UL, 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, - 0x3f9f455bUL, 0x51ccf238UL, 0xbed55317UL, 0xf437b9acUL, 0xbf804beeUL, - 0xc791a2b5UL, 0x3ec0e993UL, 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, - 0xbeaa48a2UL, 0x0a268358UL, 0xbf55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, - 0xd7767a58UL, 0x3f431806UL, 0x2aea0000UL, 0xbfc9bbe8UL, 0x7723ea61UL, - 0xbd3a2369UL, 0x00000000UL, 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, - 0xbf231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, - 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0xbef66191UL, 0x848a46c6UL, - 0xbfa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, - 0xfdd299efUL, 0xbec9dd1aUL, 0x3f8dbaafUL, 0xbf793363UL, 0x309fc6eaUL, - 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0xbe9dae11UL, - 0x3e5c67b3UL, 0xbf4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, - 0x3f3d1eb1UL, 0x29cfc000UL, 0xbfc549ceUL, 0xbf159358UL, 0xbd397b33UL, - 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x7d98a556UL, 0xbf1a3958UL, - 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, 0x3f0704c2UL, 0x00000000UL, - 0x00000000UL, 0x73742a2bUL, 0xbeed054aUL, 0x58844587UL, 0xbf9c2a13UL, - 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, 0x3f9a48f4UL, 0xa8dc9888UL, - 0xbebf8939UL, 0xaad4b5b8UL, 0xbf72f746UL, 0x9102efa1UL, 0x3ea88f82UL, - 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, 0xbe90f456UL, 0x741fb4edUL, - 0xbf46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, 0xca89ff3fUL, 0x3f36db70UL, - 0xa8a2a000UL, 0xbfc0ee13UL, 0x3da24be1UL, 0xbd338b9fUL, 0x00000000UL, - 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0x1a154b97UL, 0xbf116b01UL, 0x00000000UL, - 0x00000000UL, 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, - 0xb93820c8UL, 0xbee264d4UL, 0xbb6cbb18UL, 0xbf94ab8cUL, 0x888d4d92UL, - 0x3ed0568bUL, 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0xbeb2f950UL, - 0x22cf9f74UL, 0xbf6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, - 0x3f64aad7UL, 0x637b73afUL, 0xbe83487cUL, 0xe522591aUL, 0xbf3fc092UL, - 0xa158e8bcUL, 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, - 0xbfb9477fUL, 0xc2c2d2bcUL, 0xbd135ef9UL, 0x00000000UL, 0x00000000UL, - 0xf2fdb123UL, 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0xc41acb64UL, 0xbf05448dUL, 0x00000000UL, 0x00000000UL, - 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, 0x9e42962dUL, - 0xbed5aea5UL, 0x2579f8efUL, 0xbf8b2398UL, 0x288a1ed9UL, 0x3ec81441UL, - 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0xbea57cd3UL, 0x5766336fUL, - 0xbf617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, 0x3f62c646UL, - 0x6b8fb29cUL, 0xbe74e3a3UL, 0xdc4c0409UL, 0xbf33f952UL, 0x9bffe365UL, - 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, 0xbfb0cc62UL, - 0x016b907fUL, 0xbd119cbcUL, 0x00000000UL, 0x00000000UL, 0xe6b9d8faUL, - 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x5daf22a6UL, 0xbef429d7UL, 0x00000000UL, 0x00000000UL, 0x06bca545UL, - 0x3ef7a27dUL, 0x00000000UL, 0x00000000UL, 0x7211c19aUL, 0xbec41c3eUL, - 0x956ed53eUL, 0xbf7ae3f4UL, 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, - 0x3f96f713UL, 0x36661e6cUL, 0xbe936e09UL, 0x506f9381UL, 0xbf5122e8UL, - 0xcb6dd43fUL, 0x3e9041b9UL, 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, - 0xbe625a8aUL, 0xe5a0e9dcUL, 0xbf23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, - 0x68d43db6UL, 0x3f2cb899UL, 0x6ecac000UL, 0xbfa0c414UL, 0xcd7dd58cUL, - 0x3d13500fUL, 0x00000000UL, 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2bf70ebeUL, 0x3ef66a8fUL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0xd644267fUL, 0x3ec22805UL, 0x16c16c17UL, 0x3f96c16cUL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xc4e09162UL, - 0x3e8d6db2UL, 0xbc011567UL, 0x3f61566aUL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x1f79955cUL, 0x3e57da4eUL, 0x9334ef0bUL, - 0x3f2bbd77UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x55555555UL, 0x3fd55555UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x5daf22a6UL, 0x3ef429d7UL, - 0x00000000UL, 0x00000000UL, 0x06bca545UL, 0x3ef7a27dUL, 0x00000000UL, - 0x00000000UL, 0x7211c19aUL, 0x3ec41c3eUL, 0x956ed53eUL, 0x3f7ae3f4UL, - 0xee750e72UL, 0x3ec3901bUL, 0x91d443f5UL, 0x3f96f713UL, 0x36661e6cUL, - 0x3e936e09UL, 0x506f9381UL, 0x3f5122e8UL, 0xcb6dd43fUL, 0x3e9041b9UL, - 0x6698b2ffUL, 0x3f61b0c7UL, 0x576bf12bUL, 0x3e625a8aUL, 0xe5a0e9dcUL, - 0x3f23499dUL, 0x110384ddUL, 0x3e5b1c2cUL, 0x68d43db6UL, 0x3f2cb899UL, - 0x6ecac000UL, 0x3fa0c414UL, 0xcd7dd58cUL, 0xbd13500fUL, 0x00000000UL, - 0x00000000UL, 0x85a2c8fbUL, 0x3fd55fe0UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0xc41acb64UL, 0x3f05448dUL, 0x00000000UL, - 0x00000000UL, 0xdbb03d6fUL, 0x3efb7ad2UL, 0x00000000UL, 0x00000000UL, - 0x9e42962dUL, 0x3ed5aea5UL, 0x2579f8efUL, 0x3f8b2398UL, 0x288a1ed9UL, - 0x3ec81441UL, 0xb0198dc5UL, 0x3f979a3aUL, 0x2fdfe253UL, 0x3ea57cd3UL, - 0x5766336fUL, 0x3f617caaUL, 0x600944c3UL, 0x3e954ed6UL, 0xa4e0aaf8UL, - 0x3f62c646UL, 0x6b8fb29cUL, 0x3e74e3a3UL, 0xdc4c0409UL, 0x3f33f952UL, - 0x9bffe365UL, 0x3e6301ecUL, 0xb8869e44UL, 0x3f2fc566UL, 0xe1e04000UL, - 0x3fb0cc62UL, 0x016b907fUL, 0x3d119cbcUL, 0x00000000UL, 0x00000000UL, - 0xe6b9d8faUL, 0x3fd57fb3UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0x1a154b97UL, 0x3f116b01UL, 0x00000000UL, 0x00000000UL, - 0x2d427630UL, 0x3f0147bfUL, 0x00000000UL, 0x00000000UL, 0xb93820c8UL, - 0x3ee264d4UL, 0xbb6cbb18UL, 0x3f94ab8cUL, 0x888d4d92UL, 0x3ed0568bUL, - 0x60730f7cUL, 0x3f98b19bUL, 0xe4b1fb11UL, 0x3eb2f950UL, 0x22cf9f74UL, - 0x3f6b21cdUL, 0x4a3ff0a6UL, 0x3e9f499eUL, 0xfd2b83ceUL, 0x3f64aad7UL, - 0x637b73afUL, 0x3e83487cUL, 0xe522591aUL, 0x3f3fc092UL, 0xa158e8bcUL, - 0x3e6e3aaeUL, 0xe5e82ffaUL, 0x3f329d2fUL, 0xd636a000UL, 0x3fb9477fUL, - 0xc2c2d2bcUL, 0x3d135ef9UL, 0x00000000UL, 0x00000000UL, 0xf2fdb123UL, - 0x3fd5b566UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, - 0x7d98a556UL, 0x3f1a3958UL, 0x00000000UL, 0x00000000UL, 0x9d88dc01UL, - 0x3f0704c2UL, 0x00000000UL, 0x00000000UL, 0x73742a2bUL, 0x3eed054aUL, - 0x58844587UL, 0x3f9c2a13UL, 0x55688a79UL, 0x3ed7a326UL, 0xee33f1d6UL, - 0x3f9a48f4UL, 0xa8dc9888UL, 0x3ebf8939UL, 0xaad4b5b8UL, 0x3f72f746UL, - 0x9102efa1UL, 0x3ea88f82UL, 0xdabc29cfUL, 0x3f678228UL, 0x9289afb8UL, - 0x3e90f456UL, 0x741fb4edUL, 0x3f46f3a3UL, 0xa97f6663UL, 0x3e79b4bfUL, - 0xca89ff3fUL, 0x3f36db70UL, 0xa8a2a000UL, 0x3fc0ee13UL, 0x3da24be1UL, - 0x3d338b9fUL, 0x00000000UL, 0x00000000UL, 0x11cd6c69UL, 0x3fd601fdUL, - 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0xb9ff07ceUL, - 0x3f231c78UL, 0x00000000UL, 0x00000000UL, 0xa5517182UL, 0x3f0ff0e0UL, - 0x00000000UL, 0x00000000UL, 0x790b4cbcUL, 0x3ef66191UL, 0x848a46c6UL, - 0x3fa21ac0UL, 0xb16435faUL, 0x3ee1d3ecUL, 0x2a1aa832UL, 0x3f9c71eaUL, - 0xfdd299efUL, 0x3ec9dd1aUL, 0x3f8dbaafUL, 0x3f793363UL, 0x309fc6eaUL, - 0x3eb415d6UL, 0xbee60471UL, 0x3f6b83baUL, 0x94a0a697UL, 0x3e9dae11UL, - 0x3e5c67b3UL, 0x3f4fd07bUL, 0x9a8f3e3eUL, 0x3e86bd75UL, 0xa4beb7a4UL, - 0x3f3d1eb1UL, 0x29cfc000UL, 0x3fc549ceUL, 0xbf159358UL, 0x3d397b33UL, - 0x00000000UL, 0x00000000UL, 0x871fee6cUL, 0x3fd666f0UL, 0x00000000UL, - 0x3ff00000UL, 0x00000000UL, 0xfffffff8UL, 0x535ad890UL, 0x3f2b9320UL, - 0x00000000UL, 0x00000000UL, 0x018fdf1fUL, 0x3f16d61dUL, 0x00000000UL, - 0x00000000UL, 0x0359f1beUL, 0x3f0139e4UL, 0xa4317c6dUL, 0x3fa67e17UL, - 0x82672d0fUL, 0x3eebb405UL, 0x2f1b621eUL, 0x3f9f455bUL, 0x51ccf238UL, - 0x3ed55317UL, 0xf437b9acUL, 0x3f804beeUL, 0xc791a2b5UL, 0x3ec0e993UL, - 0x919a1db2UL, 0x3f7080c2UL, 0x336a5b0eUL, 0x3eaa48a2UL, 0x0a268358UL, - 0x3f55a443UL, 0xdfd978e4UL, 0x3e94b61fUL, 0xd7767a58UL, 0x3f431806UL, - 0x2aea0000UL, 0x3fc9bbe8UL, 0x7723ea61UL, 0x3d3a2369UL, 0x00000000UL, - 0x00000000UL, 0xdf7796ffUL, 0x3fd6e642UL, 0x00000000UL, 0x3ff00000UL, - 0x00000000UL, 0xfffffff8UL, 0x4f48b8d3UL, 0x3f33eaf9UL, 0x00000000UL, - 0x00000000UL, 0x0cf7586fUL, 0x3f20b8eaUL, 0x00000000UL, 0x00000000UL, - 0xd0258911UL, 0x3f0abaf3UL, 0x23e49fe9UL, 0x3fab5a8cUL, 0x2d53222eUL, - 0x3ef60d15UL, 0x21169451UL, 0x3fa172b2UL, 0xbb254dbcUL, 0x3ee1d3b5UL, - 0xdbf93b8eUL, 0x3f84c7dbUL, 0x05b4630bUL, 0x3ecd3364UL, 0xee9aada7UL, - 0x3f743924UL, 0x794a8297UL, 0x3eb7b7b9UL, 0xe015f797UL, 0x3f5d41f5UL, - 0xe41a4a56UL, 0x3ea35dfbUL, 0xe4c2a251UL, 0x3f49a2abUL, 0x5af9e000UL, - 0x3fce49ceUL, 0x8c743719UL, 0xbd1eb860UL, 0x00000000UL, 0x00000000UL, - 0x1b4863cfUL, 0x3fd78294UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, - 0xfffffff8UL, 0x65965966UL, 0xc0219659UL, 0x00000000UL, 0x00000000UL, - 0x882c10faUL, 0x402664f4UL, 0x00000000UL, 0x00000000UL, 0x83cd3723UL, - 0xc02c8342UL, 0x00000000UL, 0xc0000000UL, 0x55e6c23dUL, 0x403226e3UL, - 0x55555555UL, 0x40055555UL, 0x34451939UL, 0xc0371c96UL, 0xaaaaaaabUL, - 0xc00aaaaaUL, 0x0e157de0UL, 0x403d6d3dUL, 0x11111111UL, 0x40111111UL, - 0xa738201fUL, 0xc042bbceUL, 0x05b05b06UL, 0xc015b05bUL, 0x452b75e3UL, - 0x4047da36UL, 0x1ba1ba1cUL, 0x401ba1baUL, 0x00000000UL, 0xbff00000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x40000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0xc7ab4d5aUL, 0xc0085e24UL, 0x00000000UL, 0x00000000UL, 0xe93ea75dUL, - 0x400b963dUL, 0x00000000UL, 0x00000000UL, 0x94a7f25aUL, 0xc00f37e2UL, - 0x4b6261cbUL, 0xbff5f984UL, 0x5a9dd812UL, 0x4011aab0UL, 0x74c30018UL, - 0x3ffaf5a5UL, 0x7f2ce8e3UL, 0xc013fe8bUL, 0xfe8e54faUL, 0xbffd7334UL, - 0x670d618dUL, 0x4016a10cUL, 0x4db97058UL, 0x4000e012UL, 0x24df44ddUL, - 0xc0199c5fUL, 0x697d6eceUL, 0xc003006eUL, 0x83298b82UL, 0x401cfc4dUL, - 0x19d490d6UL, 0x40058c19UL, 0x2ae42850UL, 0xbfea4300UL, 0x118e20e6UL, - 0x3c7a6db8UL, 0x00000000UL, 0x40000000UL, 0xe33345b8UL, 0xbfd4e526UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x2b2c49d0UL, - 0xbff2de9cUL, 0x00000000UL, 0x00000000UL, 0x2655bc98UL, 0x3ff33e58UL, - 0x00000000UL, 0x00000000UL, 0xff691fa2UL, 0xbff3972eUL, 0xe93463bdUL, - 0xbfeeed87UL, 0x070e10a0UL, 0x3ff3f5b2UL, 0xf4d790a4UL, 0x3ff20c10UL, - 0xa04e8ea3UL, 0xbff4541aUL, 0x386accd3UL, 0xbff1369eUL, 0x222a66ddUL, - 0x3ff4b521UL, 0x22a9777eUL, 0x3ff20817UL, 0x52a04a6eUL, 0xbff5178fUL, - 0xddaa0031UL, 0xbff22137UL, 0x4447d47cUL, 0x3ff57c01UL, 0x1e9c7f1dUL, - 0x3ff29311UL, 0x2ab7f990UL, 0xbfe561b8UL, 0x209c7df1UL, 0xbc87a8c5UL, - 0x00000000UL, 0x3ff00000UL, 0x4170bcc6UL, 0x3fdc92d8UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0xcc03e501UL, 0xbfdff10fUL, - 0x00000000UL, 0x00000000UL, 0x44a4e845UL, 0x3fddb63bUL, 0x00000000UL, - 0x00000000UL, 0x3768ad9fUL, 0xbfdb72a4UL, 0x3dd01ccaUL, 0xbfe5fdb9UL, - 0xa61d2811UL, 0x3fd972b2UL, 0x5645ad0bUL, 0x3fe977f9UL, 0xd013b3abUL, - 0xbfd78ca3UL, 0xbf0bf914UL, 0xbfe4f192UL, 0x4d53e730UL, 0x3fd5d060UL, - 0x3f8b9000UL, 0x3fe49933UL, 0xe2b82f08UL, 0xbfd4322aUL, 0x5936a835UL, - 0xbfe27ae1UL, 0xb1c61c9bUL, 0x3fd2b3fbUL, 0xef478605UL, 0x3fe1659eUL, - 0x190834ecUL, 0xbfe11ab7UL, 0xcdb625eaUL, 0x3c8e564bUL, 0x00000000UL, - 0x3ff00000UL, 0xb07217e3UL, 0x3fd248f1UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x56f37042UL, 0xbfccfc56UL, 0x00000000UL, - 0x00000000UL, 0xaa563951UL, 0x3fc90125UL, 0x00000000UL, 0x00000000UL, - 0x3d0e7c5dUL, 0xbfc50533UL, 0x9bed9b2eUL, 0xbfdf0ed9UL, 0x5fe7c47cUL, - 0x3fc1f250UL, 0x96c125e5UL, 0x3fe2edd9UL, 0x5a02bbd8UL, 0xbfbe5c71UL, - 0x86362c20UL, 0xbfda08b7UL, 0x4b4435edUL, 0x3fb9d342UL, 0x4b494091UL, - 0x3fd911bdUL, 0xb56658beUL, 0xbfb5e4c7UL, 0x93a2fd76UL, 0xbfd3c092UL, - 0xda271794UL, 0x3fb29910UL, 0x3303df2bUL, 0x3fd189beUL, 0x99fcef32UL, - 0xbfda8279UL, 0xb68c1467UL, 0xbc708b2fUL, 0x00000000UL, 0x3ff00000UL, - 0x980c4337UL, 0x3fc5f619UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x9314533eUL, 0xbfbb8ec5UL, 0x00000000UL, 0x00000000UL, - 0x09aa36d0UL, 0x3fb6d3f4UL, 0x00000000UL, 0x00000000UL, 0xdcb427fdUL, - 0xbfb13950UL, 0xd87ab0bbUL, 0xbfd5335eUL, 0xce0ae8a5UL, 0x3fabb382UL, - 0x79143126UL, 0x3fddba41UL, 0x5f2b28d4UL, 0xbfa552f1UL, 0x59f21a6dUL, - 0xbfd015abUL, 0x22c27d95UL, 0x3fa0e984UL, 0xe19fc6aaUL, 0x3fd0576cUL, - 0x8f2c2950UL, 0xbf9a4898UL, 0xc0b3f22cUL, 0xbfc59462UL, 0x1883a4b8UL, - 0x3f94b61cUL, 0x3f838640UL, 0x3fc30eb8UL, 0x355c63dcUL, 0xbfd36a08UL, - 0x1dce993dUL, 0x3c6d704dUL, 0x00000000UL, 0x3ff00000UL, 0x2b82ab63UL, - 0x3fb78e92UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, - 0x5a279ea3UL, 0xbfaa3407UL, 0x00000000UL, 0x00000000UL, 0x432d65faUL, - 0x3fa70153UL, 0x00000000UL, 0x00000000UL, 0x891a4602UL, 0xbf9d03efUL, - 0xd62ca5f8UL, 0xbfca77d9UL, 0xb35f4628UL, 0x3f97a265UL, 0x433258faUL, - 0x3fd8cf51UL, 0xb58fd909UL, 0xbf8f88e3UL, 0x01771ceaUL, 0xbfc2b154UL, - 0xf3562f8eUL, 0x3f888f57UL, 0xc028a723UL, 0x3fc7370fUL, 0x20b7f9f0UL, - 0xbf80f44cUL, 0x214368e9UL, 0xbfb6dfaaUL, 0x28891863UL, 0x3f79b4b6UL, - 0x172dbbf0UL, 0x3fb6cb8eUL, 0xe0553158UL, 0xbfc975f5UL, 0x593fe814UL, - 0xbc2ef5d3UL, 0x00000000UL, 0x3ff00000UL, 0x03dec550UL, 0x3fa44203UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x4e435f9bUL, - 0xbf953f83UL, 0x00000000UL, 0x00000000UL, 0x3c6e8e46UL, 0x3f9b74eaUL, - 0x00000000UL, 0x00000000UL, 0xda5b7511UL, 0xbf85ad63UL, 0xdc230b9bUL, - 0xbfb97558UL, 0x26cb3788UL, 0x3f881308UL, 0x76fc4985UL, 0x3fd62ac9UL, - 0x77bb08baUL, 0xbf757c85UL, 0xb6247521UL, 0xbfb1381eUL, 0x5922170cUL, - 0x3f754e95UL, 0x8746482dUL, 0x3fc27f83UL, 0x11055b30UL, 0xbf64e391UL, - 0x3e666320UL, 0xbfa3e609UL, 0x0de9dae3UL, 0x3f6301dfUL, 0x1f1dca06UL, - 0x3fafa8aeUL, 0x8c5b2da2UL, 0xbfb936bbUL, 0x4e88f7a5UL, 0xbc587d05UL, - 0x00000000UL, 0x3ff00000UL, 0xa8935dd9UL, 0x3f83dde2UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x6dc9c883UL, 0x3fe45f30UL, - 0x6dc9c883UL, 0x40245f30UL, 0x00000000UL, 0x43780000UL, 0x00000000UL, - 0x43380000UL, 0x54444000UL, 0x3fb921fbUL, 0x54440000UL, 0x3fb921fbUL, - 0x67674000UL, 0xbd32e7b9UL, 0x4c4c0000UL, 0x3d468c23UL, 0x3707344aUL, - 0x3aa8a2e0UL, 0x03707345UL, 0x3ae98a2eUL, 0x00000000UL, 0x80000000UL, - 0x00000000UL, 0x80000000UL, 0x676733afUL, 0x3d32e7b9UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ff00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x7ff00000UL, 0x00000000UL, 0x00000000UL, 0xfffc0000UL, - 0xffffffffUL, 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x43600000UL, - 0x00000000UL, 0x00000000UL, 0x00000000UL, 0x3c800000UL, 0x00000000UL, - 0x00000000UL, 0x00000000UL, 0x3ca00000UL, 0x00000000UL, 0x00000000UL, - 0x00000000UL, 0x3fe00000UL, 0x00000000UL, 0x3fe00000UL, 0x00000000UL, - 0x40300000UL, 0x00000000UL, 0x3ff00000UL -}; - -void MacroAssembler::fast_tan(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) { - - Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2; - Label L_2TAG_PACKET_4_0_2; - - assert_different_registers(tmp, eax, ecx, edx); - - address static_const_table_tan = (address)_static_const_table_tan; - - subl(rsp, 120); - movl(Address(rsp, 56), tmp); - lea(tmp, ExternalAddress(static_const_table_tan)); - movsd(xmm0, Address(rsp, 128)); - pextrw(eax, xmm0, 3); - andl(eax, 32767); - subl(eax, 14368); - cmpl(eax, 2216); - jcc(Assembler::above, L_2TAG_PACKET_0_0_2); - movdqu(xmm5, Address(tmp, 5840)); - movdqu(xmm6, Address(tmp, 5856)); - unpcklpd(xmm0, xmm0); - movdqu(xmm4, Address(tmp, 5712)); - andpd(xmm4, xmm0); - movdqu(xmm1, Address(tmp, 5632)); - mulpd(xmm1, xmm0); - por(xmm5, xmm4); - addpd(xmm1, xmm5); - movdqu(xmm7, xmm1); - unpckhpd(xmm7, xmm7); - cvttsd2sil(edx, xmm7); - cvttpd2dq(xmm1, xmm1); - cvtdq2pd(xmm1, xmm1); - mulpd(xmm1, xmm6); - movdqu(xmm3, Address(tmp, 5664)); - movsd(xmm5, Address(tmp, 5728)); - addl(edx, 469248); - movdqu(xmm4, Address(tmp, 5680)); - mulpd(xmm3, xmm1); - andl(edx, 31); - mulsd(xmm5, xmm1); - movl(ecx, edx); - mulpd(xmm4, xmm1); - shll(ecx, 1); - subpd(xmm0, xmm3); - mulpd(xmm1, Address(tmp, 5696)); - addl(edx, ecx); - shll(ecx, 2); - addl(edx, ecx); - addsd(xmm5, xmm0); - movdqu(xmm2, xmm0); - subpd(xmm0, xmm4); - movsd(xmm6, Address(tmp, 5744)); - shll(edx, 4); - lea(eax, Address(tmp, 0)); - andpd(xmm5, Address(tmp, 5776)); - movdqu(xmm3, xmm0); - addl(eax, edx); - subpd(xmm2, xmm0); - unpckhpd(xmm0, xmm0); - divsd(xmm6, xmm5); - subpd(xmm2, xmm4); - movdqu(xmm7, Address(eax, 16)); - subsd(xmm3, xmm5); - mulpd(xmm7, xmm0); - subpd(xmm2, xmm1); - movdqu(xmm1, Address(eax, 48)); - mulpd(xmm1, xmm0); - movdqu(xmm4, Address(eax, 96)); - mulpd(xmm4, xmm0); - addsd(xmm2, xmm3); - movdqu(xmm3, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm7, Address(eax, 0)); - addpd(xmm1, Address(eax, 32)); - mulpd(xmm1, xmm0); - addpd(xmm4, Address(eax, 80)); - addpd(xmm7, xmm1); - movdqu(xmm1, Address(eax, 112)); - mulpd(xmm1, xmm0); - mulpd(xmm0, xmm0); - addpd(xmm4, xmm1); - movdqu(xmm1, Address(eax, 64)); - mulpd(xmm1, xmm0); - addpd(xmm7, xmm1); - movdqu(xmm1, xmm3); - mulpd(xmm3, xmm0); - mulsd(xmm0, xmm0); - mulpd(xmm1, Address(eax, 144)); - mulpd(xmm4, xmm3); - movdqu(xmm3, xmm1); - addpd(xmm7, xmm4); - movdqu(xmm4, xmm1); - mulsd(xmm0, xmm7); - unpckhpd(xmm7, xmm7); - addsd(xmm0, xmm7); - unpckhpd(xmm1, xmm1); - addsd(xmm3, xmm1); - subsd(xmm4, xmm3); - addsd(xmm1, xmm4); - movdqu(xmm4, xmm2); - movsd(xmm7, Address(eax, 144)); - unpckhpd(xmm2, xmm2); - addsd(xmm7, Address(eax, 152)); - mulsd(xmm7, xmm2); - addsd(xmm7, Address(eax, 136)); - addsd(xmm7, xmm1); - addsd(xmm0, xmm7); - movsd(xmm7, Address(tmp, 5744)); - mulsd(xmm4, xmm6); - movsd(xmm2, Address(eax, 168)); - andpd(xmm2, xmm6); - mulsd(xmm5, xmm2); - mulsd(xmm6, Address(eax, 160)); - subsd(xmm7, xmm5); - subsd(xmm2, Address(eax, 128)); - subsd(xmm7, xmm4); - mulsd(xmm7, xmm6); - movdqu(xmm4, xmm3); - subsd(xmm3, xmm2); - addsd(xmm2, xmm3); - subsd(xmm4, xmm2); - addsd(xmm0, xmm4); - subsd(xmm0, xmm7); - addsd(xmm0, xmm3); - movsd(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_0_0_2); - jcc(Assembler::greater, L_2TAG_PACKET_2_0_2); - shrl(eax, 4); - cmpl(eax, 268434558); - jcc(Assembler::notEqual, L_2TAG_PACKET_3_0_2); - movdqu(xmm3, xmm0); - mulsd(xmm3, Address(tmp, 5808)); - - bind(L_2TAG_PACKET_3_0_2); - movsd(xmm3, Address(tmp, 5792)); - mulsd(xmm3, xmm0); - addsd(xmm3, xmm0); - mulsd(xmm3, Address(tmp, 5808)); - movsd(Address(rsp, 0), xmm3); - fld_d(Address(rsp, 0)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_2_0_2); - movq(xmm7, Address(tmp, 5712)); - andpd(xmm7, xmm0); - xorpd(xmm7, xmm0); - ucomisd(xmm7, Address(tmp, 5760)); - jcc(Assembler::equal, L_2TAG_PACKET_4_0_2); - subl(rsp, 32); - movsd(Address(rsp, 0), xmm0); - lea(eax, Address(rsp, 40)); - movl(Address(rsp, 8), eax); - movl(eax, 2); - movl(Address(rsp, 12), eax); - call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlibm_tan_cot_huge()))); - addl(rsp, 32); - fld_d(Address(rsp, 8)); - jmp(L_2TAG_PACKET_1_0_2); - - bind(L_2TAG_PACKET_4_0_2); - movq(Address(rsp, 0), xmm0); - fld_d(Address(rsp, 0)); - fsub_d(Address(rsp, 0)); - - bind(L_2TAG_PACKET_1_0_2); - movl(tmp, Address(rsp, 56)); -} diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp index e7d728c2e9672..343f3ef7316af 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp @@ -236,17 +236,11 @@ void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, // and state0 and state1 can never use xmm0 register. // ofs and limit are used for multi-block byte array. // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) -#ifdef _LP64 void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block, XMMRegister shuf_mask) { -#else -void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0, - XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4, - Register buf, Register state, Register ofs, Register limit, Register rsp, - bool multi_block) { -#endif + Label done_hash, loop0; address K256 = StubRoutines::x86::k256_addr(); @@ -261,9 +255,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste palignr(state0, state1, 8); pblendw(state1, msgtmp4, 0xF0); -#ifdef _LP64 movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask)); -#endif lea(rax, ExternalAddress(K256)); bind(loop0); @@ -272,11 +264,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste // Rounds 0-3 movdqu(msg, Address(buf, 0)); -#ifdef _LP64 pshufb(msg, shuf_mask); -#else - pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); -#endif movdqa(msgtmp0, msg); paddd(msg, Address(rax, 0)); sha256rnds2(state1, state0); @@ -285,11 +273,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste // Rounds 4-7 movdqu(msg, Address(buf, 16)); -#ifdef _LP64 pshufb(msg, shuf_mask); -#else - pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); -#endif movdqa(msgtmp1, msg); paddd(msg, Address(rax, 16)); sha256rnds2(state1, state0); @@ -299,11 +283,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste // Rounds 8-11 movdqu(msg, Address(buf, 32)); -#ifdef _LP64 pshufb(msg, shuf_mask); -#else - pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); -#endif movdqa(msgtmp2, msg); paddd(msg, Address(rax, 32)); sha256rnds2(state1, state0); @@ -313,11 +293,7 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste // Rounds 12-15 movdqu(msg, Address(buf, 48)); -#ifdef _LP64 pshufb(msg, shuf_mask); -#else - pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask)); -#endif movdqa(msgtmp3, msg); paddd(msg, Address(rax, 48)); sha256rnds2(state1, state0); @@ -492,7 +468,6 @@ void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegiste } -#ifdef _LP64 /* The algorithm below is based on Intel publication: "Fast SHA-256 Implementations on Intelë Architecture Processors" by Jim Guilford, Kirk Yap and Vinodh Gopal. @@ -1697,6 +1672,3 @@ void MacroAssembler::sha512_update_ni_x1(Register arg_hash, Register arg_msg, Re bind(done_hash); } - -#endif //#ifdef _LP64 - diff --git a/src/hotspot/cpu/x86/matcher_x86.hpp b/src/hotspot/cpu/x86/matcher_x86.hpp index b311f4144b2bf..bad3a078b6607 100644 --- a/src/hotspot/cpu/x86/matcher_x86.hpp +++ b/src/hotspot/cpu/x86/matcher_x86.hpp @@ -61,51 +61,33 @@ //return value == (int) value; // Cf. storeImmL and immL32. // Probably always true, even if a temp register is required. -#ifdef _LP64 return true; -#else - return false; -#endif } -#ifdef _LP64 // No additional cost for CMOVL. static constexpr int long_cmove_cost() { return 0; } -#else - // Needs 2 CMOV's for longs. - static constexpr int long_cmove_cost() { return 1; } -#endif -#ifdef _LP64 // No CMOVF/CMOVD with SSE2 static int float_cmove_cost() { return ConditionalMoveLimit; } -#else - // No CMOVF/CMOVD with SSE/SSE2 - static int float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; } -#endif static bool narrow_oop_use_complex_address() { - NOT_LP64(ShouldNotCallThis();) assert(UseCompressedOops, "only for compressed oops code"); return (LogMinObjAlignmentInBytes <= 3); } static bool narrow_klass_use_complex_address() { - NOT_LP64(ShouldNotCallThis();) assert(UseCompressedClassPointers, "only for compressed klass code"); return (CompressedKlassPointers::shift() <= 3); } // Prefer ConN+DecodeN over ConP. static bool const_oop_prefer_decode() { - NOT_LP64(ShouldNotCallThis();) // Prefer ConN+DecodeN over ConP. return true; } // Prefer ConP over ConNKlass+DecodeNKlass. static bool const_klass_prefer_decode() { - NOT_LP64(ShouldNotCallThis();) return false; } @@ -121,33 +103,15 @@ // Java calling convention forces doubles to be aligned. static const bool misaligned_doubles_ok = true; - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. -#ifdef _LP64 - static const bool strict_fp_requires_explicit_rounding = false; -#else - static const bool strict_fp_requires_explicit_rounding = true; -#endif - // Are floats converted to double when stored to stack during deoptimization? // On x64 it is stored without conversion so we can use normal access. // On x32 it is stored with conversion only when FPU is used for floats. -#ifdef _LP64 static constexpr bool float_in_double() { return false; } -#else - static bool float_in_double() { - return (UseSSE == 0); - } -#endif // Do ints take an entire long register or just half? -#ifdef _LP64 static const bool int_in_long = true; -#else - static const bool int_in_long = false; -#endif - // Does the CPU supports vector variable shift instructions? static bool supports_vector_variable_shifts(void) { diff --git a/src/hotspot/cpu/x86/methodHandles_x86.cpp b/src/hotspot/cpu/x86/methodHandles_x86.cpp index fd738b7333e4f..513149de14ff3 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.cpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.cpp @@ -83,8 +83,8 @@ void MethodHandles::verify_klass(MacroAssembler* _masm, __ verify_oop(obj); __ testptr(obj, obj); __ jcc(Assembler::zero, L_bad); -#define PUSH { __ push(temp); LP64_ONLY( __ push(rscratch1); ) } -#define POP { LP64_ONLY( __ pop(rscratch1); ) __ pop(temp); } +#define PUSH { __ push(temp); __ push(rscratch1); } +#define POP { __ pop(rscratch1); __ pop(temp); } PUSH; __ load_klass(temp, obj, rscratch1); __ cmpptr(temp, ExternalAddress((address) klass_addr), rscratch1); @@ -140,12 +140,7 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth // JVMTI events, such as single-stepping, are implemented partly by avoiding running // compiled code in threads for which the event is enabled. Check here for // interp_only_mode if these events CAN be enabled. -#ifdef _LP64 Register rthread = r15_thread; -#else - Register rthread = temp; - __ get_thread(rthread); -#endif // interp_only is an int, on little endian it is sufficient to test the byte only // Is a cmpl faster? __ cmpb(Address(rthread, JavaThread::interp_only_mode_offset()), 0); @@ -325,7 +320,6 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, assert(is_signature_polymorphic(iid), "expected invoke iid"); Register rbx_method = rbx; // eventual target of this invocation // temps used in this code are not used in *either* compiled or interpreted calling sequences -#ifdef _LP64 Register temp1 = rscratch1; Register temp2 = rscratch2; Register temp3 = rax; @@ -334,19 +328,7 @@ void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); - } -#else - Register temp1 = (for_compiler_entry ? rsi : rdx); - Register temp2 = rdi; - Register temp3 = rax; - if (for_compiler_entry) { - assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic || iid == vmIntrinsics::_linkToNative ? noreg : rcx), "only valid assignment"); - assert_different_registers(temp1, rcx, rdx); - assert_different_registers(temp2, rcx, rdx); - assert_different_registers(temp3, rcx, rdx); - } -#endif - else { + } else { assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP } assert_different_registers(temp1, temp2, temp3, receiver_reg); @@ -535,7 +517,6 @@ void trace_method_handle_stub(const char* adaptername, for (int i = 0; i < saved_regs_count; i++) { Register r = as_Register(i); // The registers are stored in reverse order on the stack (by pusha). -#ifdef AMD64 int num_regs = UseAPX ? 32 : 16; assert(Register::available_gp_registers() == num_regs, "sanity"); if (r == rsp) { @@ -544,9 +525,6 @@ void trace_method_handle_stub(const char* adaptername, } else { ls.print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); } -#else - ls.print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); -#endif if ((i + 1) % 4 == 0) { ls.cr(); } else { @@ -652,17 +630,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt // save FP result, valid at some call sites (adapter_opt_return_float, ...) __ decrement(rsp, 2 * wordSize); -#ifdef _LP64 __ movdbl(Address(rsp, 0), xmm0); -#else - if (UseSSE >= 2) { - __ movdbl(Address(rsp, 0), xmm0); - } else if (UseSSE == 1) { - __ movflt(Address(rsp, 0), xmm0); - } else { - __ fst_d(Address(rsp, 0)); - } -#endif // LP64 // Incoming state: // rcx: method handle @@ -677,17 +645,7 @@ void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adapt __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), rsp); __ increment(rsp, sizeof(MethodHandleStubArguments)); -#ifdef _LP64 __ movdbl(xmm0, Address(rsp, 0)); -#else - if (UseSSE >= 2) { - __ movdbl(xmm0, Address(rsp, 0)); - } else if (UseSSE == 1) { - __ movflt(xmm0, Address(rsp, 0)); - } else { - __ fld_d(Address(rsp, 0)); - } -#endif // LP64 __ increment(rsp, 2 * wordSize); __ popa(); diff --git a/src/hotspot/cpu/x86/methodHandles_x86.hpp b/src/hotspot/cpu/x86/methodHandles_x86.hpp index 6574fec66017a..9ffe5e198acd8 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.hpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.hpp @@ -60,5 +60,5 @@ enum /* platform_dependent_constants */ { static Register saved_last_sp_register() { // Should be in sharedRuntime, not here. - return LP64_ONLY(r13) NOT_LP64(rsi); + return r13; } diff --git a/src/hotspot/cpu/x86/nativeInst_x86.cpp b/src/hotspot/cpu/x86/nativeInst_x86.cpp index d5021c29ed6b0..b918fe3dd4423 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.cpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.cpp @@ -68,9 +68,7 @@ void NativeCall::print() { // Inserts a native call instruction at a given pc void NativeCall::insert(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = instruction_code; *((int32_t *)(code_pos+1)) = (int32_t) disp; ICache::invalidate_range(code_pos, instruction_size); @@ -158,7 +156,6 @@ void NativeCall::set_destination_mt_safe(address dest) { void NativeMovConstReg::verify() { -#ifdef AMD64 // make sure code pattern is actually a mov reg64, imm64 instruction bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB; bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 && @@ -170,12 +167,6 @@ void NativeMovConstReg::verify() { print(); fatal("not a REX.W[B] mov reg64, imm64"); } -#else - // make sure code pattern is actually a mov reg, imm32 instruction - u_char test_byte = *(u_char*)instruction_address(); - u_char test_byte_2 = test_byte & ( 0xff ^ register_mask); - if (test_byte_2 != instruction_code) fatal("not a mov reg, imm32"); -#endif // AMD64 } @@ -193,12 +184,10 @@ int NativeMovRegMem::instruction_start() const { // See comment in Assembler::locate_operand() about VEX prefixes. if (instr_0 == instruction_VEX_prefix_2bytes) { assert((UseAVX > 0), "shouldn't have VEX prefix"); - NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); return 2; } if (instr_0 == instruction_VEX_prefix_3bytes) { assert((UseAVX > 0), "shouldn't have VEX prefix"); - NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions")); return 3; } if (instr_0 == instruction_EVEX_prefix_4bytes) { @@ -314,8 +303,7 @@ void NativeMovRegMem::print() { void NativeLoadAddress::verify() { // make sure code pattern is actually a mov [reg+offset], reg instruction u_char test_byte = *(u_char*)instruction_address(); - if ( ! ((test_byte == lea_instruction_code) - LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) { + if ( ! ((test_byte == lea_instruction_code) || (test_byte == mov64_instruction_code) )) { fatal ("not a lea reg, [reg+offs] instruction"); } } @@ -341,9 +329,7 @@ void NativeJump::verify() { void NativeJump::insert(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = instruction_code; *((int32_t*)(code_pos + 1)) = (int32_t)disp; @@ -356,11 +342,7 @@ void NativeJump::check_verified_entry_alignment(address entry, address verified_ // in use. The patching in that instance must happen only when certain // alignment restrictions are true. These guarantees check those // conditions. -#ifdef AMD64 const int linesize = 64; -#else - const int linesize = 32; -#endif // AMD64 // Must be wordSize aligned guarantee(((uintptr_t) verified_entry & (wordSize -1)) == 0, @@ -387,7 +369,6 @@ void NativeJump::check_verified_entry_alignment(address entry, address verified_ // void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { // complete jump instruction (to be inserted) is in code_buffer; -#ifdef _LP64 union { jlong cb_long; unsigned char code_buffer[8]; @@ -403,43 +384,6 @@ void NativeJump::patch_verified_entry(address entry, address verified_entry, add Atomic::store((jlong *) verified_entry, u.cb_long); ICache::invalidate_range(verified_entry, 8); - -#else - unsigned char code_buffer[5]; - code_buffer[0] = instruction_code; - intptr_t disp = (intptr_t)dest - ((intptr_t)verified_entry + 1 + 4); - *(int32_t*)(code_buffer + 1) = (int32_t)disp; - - check_verified_entry_alignment(entry, verified_entry); - - // Can't call nativeJump_at() because it's asserts jump exists - NativeJump* n_jump = (NativeJump*) verified_entry; - - //First patch dummy jmp in place - - unsigned char patch[4]; - assert(sizeof(patch)==sizeof(int32_t), "sanity check"); - patch[0] = 0xEB; // jmp rel8 - patch[1] = 0xFE; // jmp to self - patch[2] = 0xEB; - patch[3] = 0xFE; - - // First patch dummy jmp in place - *(int32_t*)verified_entry = *(int32_t *)patch; - - n_jump->wrote(0); - - // Patch 5th byte (from jump instruction) - verified_entry[4] = code_buffer[4]; - - n_jump->wrote(4); - - // Patch bytes 0-3 (from jump instruction) - *(int32_t*)verified_entry = *(int32_t *)code_buffer; - // Invalidate. Opteron requires a flush after every write. - n_jump->wrote(0); -#endif // _LP64 - } void NativeIllegalInstruction::insert(address code_pos) { @@ -456,9 +400,7 @@ void NativeGeneralJump::verify() { void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4); -#ifdef AMD64 guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset"); -#endif // AMD64 *code_pos = unconditional_long_jump; *((int32_t *)(code_pos+1)) = (int32_t) disp; diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp index d02387aa9ffbb..d5cd0f4295767 100644 --- a/src/hotspot/cpu/x86/nativeInst_x86.hpp +++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp @@ -126,10 +126,8 @@ class NativeCall: public NativeInstruction { address return_address() const { return addr_at(return_address_offset); } address destination() const; void set_destination(address dest) { -#ifdef AMD64 intptr_t disp = dest - return_address(); guarantee(disp == (intptr_t)(jint)disp, "must be 32-bit offset"); -#endif // AMD64 set_int_at(displacement_offset, (int)(dest - return_address())); } // Returns whether the 4-byte displacement operand is 4-byte aligned. @@ -211,15 +209,10 @@ class NativeCallReg: public NativeInstruction { // Instruction format for implied addressing mode immediate operand move to register instruction: // [REX/REX2] [OPCODE] [IMM32] class NativeMovConstReg: public NativeInstruction { -#ifdef AMD64 static const bool has_rex = true; static const int rex_size = 1; static const int rex2_size = 2; -#else - static const bool has_rex = false; - static const int rex_size = 0; - static const int rex2_size = 0; -#endif // AMD64 + public: enum Intel_specific_constants { instruction_code = 0xB8, @@ -390,13 +383,9 @@ inline NativeMovRegMem* nativeMovRegMem_at (address address) { // leal reg, [reg + offset] class NativeLoadAddress: public NativeMovRegMem { -#ifdef AMD64 static const bool has_rex = true; static const int rex_size = 1; -#else - static const bool has_rex = false; - static const int rex_size = 0; -#endif // AMD64 + public: enum Intel_specific_constants { instruction_prefix_wide = Assembler::REX_W, @@ -447,9 +436,7 @@ class NativeJump: public NativeInstruction { if (dest == (address) -1) { val = -5; // jump to self } -#ifdef AMD64 assert((labs(val) & 0xFFFFFFFF00000000) == 0 || dest == (address)-1, "must be 32bit offset or -1"); -#endif // AMD64 set_int_at(data_offset, (jint)val); } @@ -572,19 +559,14 @@ inline bool NativeInstruction::is_jump_reg() { inline bool NativeInstruction::is_cond_jump() { return (int_at(0) & 0xF0FF) == 0x800F /* long jump */ || (ubyte_at(0) & 0xF0) == 0x70; /* short jump */ } inline bool NativeInstruction::is_safepoint_poll() { -#ifdef AMD64 const bool has_rex_prefix = ubyte_at(0) == NativeTstRegMem::instruction_rex_b_prefix; const int test_offset = has_rex2_prefix() ? 2 : (has_rex_prefix ? 1 : 0); -#else - const int test_offset = 0; -#endif const bool is_test_opcode = ubyte_at(test_offset) == NativeTstRegMem::instruction_code_memXregl; const bool is_rax_target = (ubyte_at(test_offset + 1) & NativeTstRegMem::modrm_mask) == NativeTstRegMem::modrm_reg; return is_test_opcode && is_rax_target; } inline bool NativeInstruction::is_mov_literal64() { -#ifdef AMD64 bool valid_rex_prefix = ubyte_at(0) == Assembler::REX_W || ubyte_at(0) == Assembler::REX_WB; bool valid_rex2_prefix = ubyte_at(0) == Assembler::REX2 && (ubyte_at(1) == Assembler::REX2BIT_W || @@ -593,9 +575,6 @@ inline bool NativeInstruction::is_mov_literal64() { int opcode = has_rex2_prefix() ? ubyte_at(2) : ubyte_at(1); return ((valid_rex_prefix || valid_rex2_prefix) && (opcode & (0xff ^ NativeMovConstReg::register_mask)) == 0xB8); -#else - return false; -#endif // AMD64 } class NativePostCallNop: public NativeInstruction { diff --git a/src/hotspot/cpu/x86/register_x86.cpp b/src/hotspot/cpu/x86/register_x86.cpp index dc5aba3c17801..776041ff6b738 100644 --- a/src/hotspot/cpu/x86/register_x86.cpp +++ b/src/hotspot/cpu/x86/register_x86.cpp @@ -33,14 +33,10 @@ const KRegister::KRegisterImpl all_KRegisterImpls [KRegister::number_ const char * Register::RegisterImpl::name() const { static const char *const names[number_of_registers] = { -#ifdef _LP64 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31" -#else - "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi" -#endif // _LP64 }; return is_valid() ? names[encoding()] : "noreg"; } @@ -55,11 +51,9 @@ const char* FloatRegister::FloatRegisterImpl::name() const { const char* XMMRegister::XMMRegisterImpl::name() const { static const char *const names[number_of_registers] = { "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" -#ifdef _LP64 ,"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" ,"xmm16", "xmm17", "xmm18", "xmm19", "xmm20", "xmm21", "xmm22", "xmm23" ,"xmm24", "xmm25", "xmm26", "xmm27", "xmm28", "xmm29", "xmm30", "xmm31" -#endif // _LP64 }; return is_valid() ? names[encoding()] : "xnoreg"; } diff --git a/src/hotspot/cpu/x86/register_x86.hpp b/src/hotspot/cpu/x86/register_x86.hpp index 0a8ecb7be265f..f1bf760b4b8fd 100644 --- a/src/hotspot/cpu/x86/register_x86.hpp +++ b/src/hotspot/cpu/x86/register_x86.hpp @@ -45,9 +45,9 @@ class Register { inline friend constexpr Register as_Register(int encoding); enum { - number_of_registers = LP64_ONLY( 32 ) NOT_LP64( 8 ), - number_of_byte_registers = LP64_ONLY( 32 ) NOT_LP64( 4 ), - max_slots_per_register = LP64_ONLY( 2 ) NOT_LP64( 1 ) + number_of_registers = 32, + number_of_byte_registers = 32, + max_slots_per_register = 2 }; class RegisterImpl: public AbstractRegisterImpl { @@ -79,11 +79,9 @@ class Register { // Actually available GP registers for use, depending on actual CPU capabilities and flags. static int available_gp_registers() { -#ifdef _LP64 if (!UseAPX) { return number_of_registers / 2; } -#endif // _LP64 return number_of_registers; } }; @@ -116,7 +114,6 @@ constexpr Register rsp = as_Register(4); constexpr Register rbp = as_Register(5); constexpr Register rsi = as_Register(6); constexpr Register rdi = as_Register(7); -#ifdef _LP64 constexpr Register r8 = as_Register( 8); constexpr Register r9 = as_Register( 9); constexpr Register r10 = as_Register(10); @@ -141,10 +138,10 @@ constexpr Register r28 = as_Register(28); constexpr Register r29 = as_Register(29); constexpr Register r30 = as_Register(30); constexpr Register r31 = as_Register(31); -#endif // _LP64 // The implementation of x87 floating point registers for the ia32 architecture. +// TODO: This is not needed anymore, remove or set number_of_registers=0? class FloatRegister { private: int _encoding; @@ -218,8 +215,8 @@ class XMMRegister { inline friend constexpr XMMRegister as_XMMRegister(int encoding); enum { - number_of_registers = LP64_ONLY( 32 ) NOT_LP64( 8 ), - max_slots_per_register = LP64_ONLY( 16 ) NOT_LP64( 16 ) // 512-bit + number_of_registers = 32, + max_slots_per_register = 16 }; class XMMRegisterImpl: public AbstractRegisterImpl { @@ -250,11 +247,9 @@ class XMMRegister { // Actually available XMM registers for use, depending on actual CPU capabilities and flags. static int available_xmm_registers() { -#ifdef _LP64 if (UseAVX < 3) { return number_of_registers / 2; } -#endif // _LP64 return number_of_registers; } }; @@ -287,7 +282,6 @@ constexpr XMMRegister xmm4 = as_XMMRegister( 4); constexpr XMMRegister xmm5 = as_XMMRegister( 5); constexpr XMMRegister xmm6 = as_XMMRegister( 6); constexpr XMMRegister xmm7 = as_XMMRegister( 7); -#ifdef _LP64 constexpr XMMRegister xmm8 = as_XMMRegister( 8); constexpr XMMRegister xmm9 = as_XMMRegister( 9); constexpr XMMRegister xmm10 = as_XMMRegister(10); @@ -312,7 +306,6 @@ constexpr XMMRegister xmm28 = as_XMMRegister(28); constexpr XMMRegister xmm29 = as_XMMRegister(29); constexpr XMMRegister xmm30 = as_XMMRegister(30); constexpr XMMRegister xmm31 = as_XMMRegister(31); -#endif // _LP64 // The implementation of AVX-512 opmask registers. @@ -405,12 +398,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { // There is no requirement that any ordering here matches any ordering c2 gives // it's optoregs. - // x86_32.ad defines additional dummy FILL0-FILL7 registers, in order to tally - // REG_COUNT (computed by ADLC based on the number of reg_defs seen in .ad files) - // with ConcreteRegisterImpl::number_of_registers additional count of 8 is being - // added for 32 bit jvm. number_of_registers = max_kpr + // gpr/fpr/xmm/kpr - NOT_LP64( 8 + ) // FILL0-FILL7 in x86_32.ad 1 // eflags }; }; diff --git a/src/hotspot/cpu/x86/relocInfo_x86.cpp b/src/hotspot/cpu/x86/relocInfo_x86.cpp index 2df98c4311b2c..8368b50e9b709 100644 --- a/src/hotspot/cpu/x86/relocInfo_x86.cpp +++ b/src/hotspot/cpu/x86/relocInfo_x86.cpp @@ -37,7 +37,6 @@ void Relocation::pd_set_data_value(address x, bool verify_only) { -#ifdef AMD64 typedef Assembler::WhichOperand WhichOperand; WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop assert(which == Assembler::disp32_operand || @@ -77,13 +76,6 @@ void Relocation::pd_set_data_value(address x, bool verify_only) { *(int32_t*) disp = checked_cast(x - next_ip); } } -#else - if (verify_only) { - guarantee(*pd_address_in_code() == x, "instructions must match"); - } else { - *pd_address_in_code() = x; - } -#endif // AMD64 } @@ -151,22 +143,17 @@ address* Relocation::pd_address_in_code() { assert(is_data(), "must be a DataRelocation"); typedef Assembler::WhichOperand WhichOperand; WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm/imm32 -#ifdef AMD64 assert(which == Assembler::disp32_operand || which == Assembler::call32_operand || which == Assembler::imm_operand, "format unpacks ok"); // The "address" in the code is a displacement can't return it as // and address* since it is really a jint* guarantee(which == Assembler::imm_operand, "must be immediate operand"); -#else - assert(which == Assembler::disp32_operand || which == Assembler::imm_operand, "format unpacks ok"); -#endif // AMD64 return (address*) Assembler::locate_operand(addr(), which); } address Relocation::pd_get_address_from_code() { -#ifdef AMD64 // All embedded Intel addresses are stored in 32-bit words. // Since the addr points at the start of the instruction, // we must parse the instruction a bit to find the embedded word. @@ -183,7 +170,6 @@ address Relocation::pd_get_address_from_code() { address a = next_ip + *(int32_t*) disp; return a; } -#endif // AMD64 return *pd_address_in_code(); } diff --git a/src/hotspot/cpu/x86/relocInfo_x86.hpp b/src/hotspot/cpu/x86/relocInfo_x86.hpp index d3f213a6686e1..a4a7ec7548f48 100644 --- a/src/hotspot/cpu/x86/relocInfo_x86.hpp +++ b/src/hotspot/cpu/x86/relocInfo_x86.hpp @@ -31,13 +31,9 @@ // Intel instructions are byte-aligned. offset_unit = 1, - // Encodes Assembler::disp32_operand vs. Assembler::imm32_operand. -#ifndef AMD64 - format_width = 1 -#else + // Encodes Assembler::disp32_operand vs. Assembler::imm32_operand // vs Assembler::narrow_oop_operand and ZGC barrier encodings. format_width = 3 -#endif }; public: diff --git a/src/hotspot/cpu/x86/runtime_x86_32.cpp b/src/hotspot/cpu/x86/runtime_x86_32.cpp deleted file mode 100644 index 9bd4239d665f3..0000000000000 --- a/src/hotspot/cpu/x86/runtime_x86_32.cpp +++ /dev/null @@ -1,331 +0,0 @@ -/* - * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#ifdef COMPILER2 -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "code/vmreg.hpp" -#include "compiler/oopMap.hpp" -#include "interpreter/interpreter.hpp" -#include "memory/resourceArea.hpp" -#include "opto/runtime.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/vframeArray.hpp" -#include "utilities/globalDefinitions.hpp" -#include "vmreg_x86.inline.hpp" -#endif - - -#define __ masm-> - -//------------------------------generate_uncommon_trap_blob-------------------- -void OptoRuntime::generate_uncommon_trap_blob() { - // allocate space for the code - ResourceMark rm; - // setup code generation tools - CodeBuffer buffer("uncommon_trap_blob", 512, 512); - MacroAssembler* masm = new MacroAssembler(&buffer); - - enum frame_layout { - arg0_off, // thread sp + 0 // Arg location for - arg1_off, // unloaded_class_index sp + 1 // calling C - arg2_off, // exec_mode sp + 2 - // The frame sender code expects that rbp will be in the "natural" place and - // will override any oopMap setting for it. We must therefore force the layout - // so that it agrees with the frame sender code. - rbp_off, // callee saved register sp + 3 - return_off, // slot for return address sp + 4 - framesize - }; - - address start = __ pc(); - - // Push self-frame. - __ subptr(rsp, return_off*wordSize); // Epilog! - - // rbp, is an implicitly saved callee saved register (i.e. the calling - // convention will save restore it in prolog/epilog) Other than that - // there are no callee save registers no that adapter frames are gone. - __ movptr(Address(rsp, rbp_off*wordSize), rbp); - - // Clear the floating point exception stack - __ empty_FPU_stack(); - - // set last_Java_sp - __ get_thread(rdx); - __ set_last_Java_frame(rdx, noreg, noreg, nullptr, noreg); - - // Call C code. Need thread but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. Call should - // capture callee-saved registers as well as return values. - __ movptr(Address(rsp, arg0_off*wordSize), rdx); - // argument already in ECX - __ movl(Address(rsp, arg1_off*wordSize),rcx); - __ movl(Address(rsp, arg2_off*wordSize), Deoptimization::Unpack_uncommon_trap); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap))); - - // Set an oopmap for the call site - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map = new OopMap( framesize, 0 ); - // No oopMap for rbp, it is known implicitly - - oop_maps->add_gc_map( __ pc()-start, map); - - __ get_thread(rcx); - - __ reset_last_Java_frame(rcx, false); - - // Load UnrollBlock into EDI - __ movptr(rdi, rax); - -#ifdef ASSERT - { Label L; - __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()), - (int32_t)Deoptimization::Unpack_uncommon_trap); - __ jcc(Assembler::equal, L); - __ stop("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap"); - __ bind(L); - } -#endif - - // Pop all the frames we must move/replace. - // - // Frame picture (youngest to oldest) - // 1: self-frame (no frame link) - // 2: deopting frame (no frame link) - // 3: caller of deopting frame (could be compiled/interpreted). - - // Pop self-frame. We have no frame, and must rely only on EAX and ESP. - __ addptr(rsp,(framesize-1)*wordSize); // Epilog! - - // Pop deoptimized frame - __ movl2ptr(rcx, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); - __ addptr(rsp, rcx); - - // sp should be pointing at the return address to the caller (3) - - // Pick up the initial fp we should save - // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) - __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); - -#ifdef ASSERT - // Compilers generate code that bang the stack by as much as the - // interpreter would need. So this stack banging should never - // trigger a fault. Verify that it does not on non product builds. - __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset())); - __ bang_stack_size(rbx, rcx); -#endif - - // Load array of frame pcs into ECX - __ movl(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset())); - - __ pop(rsi); // trash the pc - - // Load array of frame sizes into ESI - __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset())); - - Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset()); - - __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); - __ movl(counter, rbx); - - // Now adjust the caller's stack to make up for the extra locals - // but record the original sp so that we can save it in the skeletal interpreter - // frame and the stack walking of interpreter_sender will get the unextended sp - // value and not the "real" sp value. - - Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset()); - __ movptr(sp_temp, rsp); - __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset())); - __ subptr(rsp, rbx); - - // Push interpreter frames in a loop - Label loop; - __ bind(loop); - __ movptr(rbx, Address(rsi, 0)); // Load frame size - __ subptr(rbx, 2*wordSize); // we'll push pc and rbp, by hand - __ pushptr(Address(rcx, 0)); // save return address - __ enter(); // save old & set new rbp, - __ subptr(rsp, rbx); // Prolog! - __ movptr(rbx, sp_temp); // sender's sp - // This value is corrected by layout_activation_impl - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD ); - __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable - __ movptr(sp_temp, rsp); // pass to next frame - __ addptr(rsi, wordSize); // Bump array pointer (sizes) - __ addptr(rcx, wordSize); // Bump array pointer (pcs) - __ decrementl(counter); // decrement counter - __ jcc(Assembler::notZero, loop); - __ pushptr(Address(rcx, 0)); // save final return address - - // Re-push self-frame - __ enter(); // save old & set new rbp, - __ subptr(rsp, (framesize-2) * wordSize); // Prolog! - - - // set last_Java_sp, last_Java_fp - __ get_thread(rdi); - __ set_last_Java_frame(rdi, noreg, rbp, nullptr, noreg); - - // Call C code. Need thread but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. Call should - // restore return values to their stack-slots with the new SP. - __ movptr(Address(rsp,arg0_off*wordSize),rdi); - __ movl(Address(rsp,arg1_off*wordSize), Deoptimization::Unpack_uncommon_trap); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); - // Set an oopmap for the call site - oop_maps->add_gc_map( __ pc()-start, new OopMap( framesize, 0 ) ); - - __ get_thread(rdi); - __ reset_last_Java_frame(rdi, true); - - // Pop self-frame. - __ leave(); // Epilog! - - // Jump to interpreter - __ ret(0); - - // ------------- - // make sure all code is generated - masm->flush(); - - _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize); -} - -//------------------------------generate_exception_blob--------------------------- -// creates exception blob at the end -// Using exception blob, this code is jumped from a compiled method. -// -// Given an exception pc at a call we call into the runtime for the -// handler in this method. This handler might merely restore state -// (i.e. callee save registers) unwind the frame and jump to the -// exception handler for the nmethod if there is no Java level handler -// for the nmethod. -// -// This code is entered with a jmp. -// -// Arguments: -// rax: exception oop -// rdx: exception pc -// -// Results: -// rax: exception oop -// rdx: exception pc in caller or ??? -// destination: exception handler of caller -// -// Note: the exception pc MUST be at a call (precise debug information) -// Only register rax, rdx, rcx are not callee saved. -// - -void OptoRuntime::generate_exception_blob() { - - // Capture info about frame layout - enum layout { - thread_off, // last_java_sp - // The frame sender code expects that rbp will be in the "natural" place and - // will override any oopMap setting for it. We must therefore force the layout - // so that it agrees with the frame sender code. - rbp_off, - return_off, // slot for return address - framesize - }; - - // allocate space for the code - ResourceMark rm; - // setup code generation tools - CodeBuffer buffer("exception_blob", 512, 512); - MacroAssembler* masm = new MacroAssembler(&buffer); - - OopMapSet *oop_maps = new OopMapSet(); - - address start = __ pc(); - - __ push(rdx); - __ subptr(rsp, return_off * wordSize); // Prolog! - - // rbp, location is implicitly known - __ movptr(Address(rsp,rbp_off *wordSize), rbp); - - // Store exception in Thread object. We cannot pass any arguments to the - // handle_exception call, since we do not want to make any assumption - // about the size of the frame where the exception happened in. - __ get_thread(rcx); - __ movptr(Address(rcx, JavaThread::exception_oop_offset()), rax); - __ movptr(Address(rcx, JavaThread::exception_pc_offset()), rdx); - - // This call does all the hard work. It checks if an exception handler - // exists in the method. - // If so, it returns the handler address. - // If not, it prepares for stack-unwinding, restoring the callee-save - // registers of the frame being removed. - // - __ movptr(Address(rsp, thread_off * wordSize), rcx); // Thread is first argument - __ set_last_Java_frame(rcx, noreg, noreg, nullptr, noreg); - - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C))); - - // No registers to map, rbp is known implicitly - oop_maps->add_gc_map( __ pc() - start, new OopMap( framesize, 0 )); - __ get_thread(rcx); - __ reset_last_Java_frame(rcx, false); - - // Restore callee-saved registers - __ movptr(rbp, Address(rsp, rbp_off * wordSize)); - - __ addptr(rsp, return_off * wordSize); // Epilog! - __ pop(rdx); // Exception pc - - // rax: exception handler for given - - // We have a handler in rax, (could be deopt blob) - // rdx - throwing pc, deopt blob will need it. - - __ push(rax); - - // Get the exception - __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset())); - // Get the exception pc in case we are deoptimized - __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset())); -#ifdef ASSERT - __ movptr(Address(rcx, JavaThread::exception_handler_pc_offset()), NULL_WORD); - __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD); -#endif - // Clear the exception oop so GC no longer processes it as a root. - __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD); - - __ pop(rcx); - - // rax: exception oop - // rcx: exception handler - // rdx: exception pc - __ jmp (rcx); - - // ------------- - // make sure all code is generated - masm->flush(); - - _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); -} diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp index ebdd47f3a3f87..c35ade8744eb1 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86.cpp @@ -73,22 +73,14 @@ void SharedRuntime::inline_check_hashcode_from_object_header(MacroAssembler* mas __ jcc(Assembler::zero, slowCase); } - // get hash -#ifdef _LP64 // Read the header and build a mask to get its hash field. // Depend on hash_mask being at most 32 bits and avoid the use of hash_mask_in_place // because it could be larger than 32 bits in a 64-bit vm. See markWord.hpp. __ shrptr(result, markWord::hash_shift); __ andptr(result, markWord::hash_mask); -#else - __ andptr(result, markWord::hash_mask_in_place); -#endif //_LP64 // test if hashCode exists __ jcc(Assembler::zero, slowCase); -#ifndef _LP64 - __ shrptr(result, markWord::hash_shift); -#endif __ ret(0); __ bind(slowCase); } diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp deleted file mode 100644 index a6a662b3d1e0c..0000000000000 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ /dev/null @@ -1,2855 +0,0 @@ -/* - * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "code/compiledIC.hpp" -#include "code/debugInfoRec.hpp" -#include "code/nativeInst.hpp" -#include "code/vtableStubs.hpp" -#include "compiler/oopMap.hpp" -#include "gc/shared/gcLocker.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/barrierSetAssembler.hpp" -#include "interpreter/interpreter.hpp" -#include "logging/log.hpp" -#include "memory/resourceArea.hpp" -#include "oops/klass.inline.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/jniHandles.hpp" -#include "runtime/safepointMechanism.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/signature.hpp" -#include "runtime/stubRoutines.hpp" -#include "runtime/timerTrace.hpp" -#include "runtime/vframeArray.hpp" -#include "runtime/vm_version.hpp" -#include "utilities/align.hpp" -#include "vmreg_x86.inline.hpp" -#ifdef COMPILER1 -#include "c1/c1_Runtime1.hpp" -#endif -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -#define __ masm-> - -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif // PRODUCT - -const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; - -class RegisterSaver { - // Capture info about frame layout -#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off - enum layout { - fpu_state_off = 0, - fpu_state_end = fpu_state_off+FPUStateSizeInWords, - st0_off, st0H_off, - st1_off, st1H_off, - st2_off, st2H_off, - st3_off, st3H_off, - st4_off, st4H_off, - st5_off, st5H_off, - st6_off, st6H_off, - st7_off, st7H_off, - xmm_off, - DEF_XMM_OFFS(0), - DEF_XMM_OFFS(1), - DEF_XMM_OFFS(2), - DEF_XMM_OFFS(3), - DEF_XMM_OFFS(4), - DEF_XMM_OFFS(5), - DEF_XMM_OFFS(6), - DEF_XMM_OFFS(7), - flags_off = xmm7_off + 16/BytesPerInt + 1, // 16-byte stack alignment fill word - rdi_off, - rsi_off, - ignore_off, // extra copy of rbp, - rsp_off, - rbx_off, - rdx_off, - rcx_off, - rax_off, - // The frame sender code expects that rbp will be in the "natural" place and - // will override any oopMap setting for it. We must therefore force the layout - // so that it agrees with the frame sender code. - rbp_off, - return_off, // slot for return address - reg_save_size }; - enum { FPU_regs_live = flags_off - fpu_state_end }; - - public: - - static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu = true, bool save_vectors = false); - static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); - - static int rax_offset() { return rax_off; } - static int rbx_offset() { return rbx_off; } - - // Offsets into the register save area - // Used by deoptimization when it is managing result register - // values on its own - - static int raxOffset(void) { return rax_off; } - static int rdxOffset(void) { return rdx_off; } - static int rbxOffset(void) { return rbx_off; } - static int xmm0Offset(void) { return xmm0_off; } - // This really returns a slot in the fp save area, which one is not important - static int fpResultOffset(void) { return st0_off; } - - // During deoptimization only the result register need to be restored - // all the other values have already been extracted. - - static void restore_result_registers(MacroAssembler* masm); - -}; - -OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, - int* total_frame_words, bool verify_fpu, bool save_vectors) { - int num_xmm_regs = XMMRegister::number_of_registers; - int ymm_bytes = num_xmm_regs * 16; - int zmm_bytes = num_xmm_regs * 32; -#ifdef COMPILER2 - int opmask_state_bytes = KRegister::number_of_registers * 8; - if (save_vectors) { - assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); - assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); - // Save upper half of YMM registers - int vect_bytes = ymm_bytes; - if (UseAVX > 2) { - // Save upper half of ZMM registers as well - vect_bytes += zmm_bytes; - additional_frame_words += opmask_state_bytes / wordSize; - } - additional_frame_words += vect_bytes / wordSize; - } -#else - assert(!save_vectors, "vectors are generated only by C2"); -#endif - int frame_size_in_bytes = (reg_save_size + additional_frame_words) * wordSize; - int frame_words = frame_size_in_bytes / wordSize; - *total_frame_words = frame_words; - - assert(FPUStateSizeInWords == 27, "update stack layout"); - - // save registers, fpu state, and flags - // We assume caller has already has return address slot on the stack - // We push epb twice in this sequence because we want the real rbp, - // to be under the return like a normal enter and we want to use pusha - // We push by hand instead of using push. - __ enter(); - __ pusha(); - __ pushf(); - __ subptr(rsp,FPU_regs_live*wordSize); // Push FPU registers space - __ push_FPU_state(); // Save FPU state & init - - if (verify_fpu) { - // Some stubs may have non standard FPU control word settings so - // only check and reset the value when it required to be the - // standard value. The safepoint blob in particular can be used - // in methods which are using the 24 bit control word for - // optimized float math. - -#ifdef ASSERT - // Make sure the control word has the expected value - Label ok; - __ cmpw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std()); - __ jccb(Assembler::equal, ok); - __ stop("corrupted control word detected"); - __ bind(ok); -#endif - - // Reset the control word to guard against exceptions being unmasked - // since fstp_d can cause FPU stack underflow exceptions. Write it - // into the on stack copy and then reload that to make sure that the - // current and future values are correct. - __ movw(Address(rsp, 0), StubRoutines::x86::fpu_cntrl_wrd_std()); - } - - __ frstor(Address(rsp, 0)); - if (!verify_fpu) { - // Set the control word so that exceptions are masked for the - // following code. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - int off = st0_off; - int delta = st1_off - off; - - // Save the FPU registers in de-opt-able form - for (int n = 0; n < FloatRegister::number_of_registers; n++) { - __ fstp_d(Address(rsp, off*wordSize)); - off += delta; - } - - off = xmm0_off; - delta = xmm1_off - off; - if(UseSSE == 1) { - // Save the XMM state - for (int n = 0; n < num_xmm_regs; n++) { - __ movflt(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } - } else if(UseSSE >= 2) { - // Save whole 128bit (16 bytes) XMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(Address(rsp, off*wordSize), as_XMMRegister(n)); - off += delta; - } - } - -#ifdef COMPILER2 - if (save_vectors) { - __ subptr(rsp, ymm_bytes); - // Save upper half of YMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n)); - } - if (UseAVX > 2) { - __ subptr(rsp, zmm_bytes); - // Save upper half of ZMM registers - for (int n = 0; n < num_xmm_regs; n++) { - __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n)); - } - __ subptr(rsp, opmask_state_bytes); - // Save opmask registers - for (int n = 0; n < KRegister::number_of_registers; n++) { - __ kmov(Address(rsp, n*8), as_KRegister(n)); - } - } - } -#else - assert(!save_vectors, "vectors are generated only by C2"); -#endif - - __ vzeroupper(); - - // Set an oopmap for the call site. This oopmap will map all - // oop-registers and debug-info registers as callee-saved. This - // will allow deoptimization at this safepoint to find all possible - // debug-info recordings, as well as let GC find all oops. - - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map = new OopMap( frame_words, 0 ); - -#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) -#define NEXTREG(x) (x)->as_VMReg()->next() - - map->set_callee_saved(STACK_OFFSET(rax_off), rax->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rcx_off), rcx->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rdx_off), rdx->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rbx_off), rbx->as_VMReg()); - // rbp, location is known implicitly, no oopMap - map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg()); - - // %%% This is really a waste but we'll keep things as they were for now for the upper component - off = st0_off; - delta = st1_off - off; - for (int n = 0; n < FloatRegister::number_of_registers; n++) { - FloatRegister freg_name = as_FloatRegister(n); - map->set_callee_saved(STACK_OFFSET(off), freg_name->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(freg_name)); - off += delta; - } - off = xmm0_off; - delta = xmm1_off - off; - for (int n = 0; n < num_xmm_regs; n++) { - XMMRegister xmm_name = as_XMMRegister(n); - map->set_callee_saved(STACK_OFFSET(off), xmm_name->as_VMReg()); - map->set_callee_saved(STACK_OFFSET(off+1), NEXTREG(xmm_name)); - off += delta; - } -#undef NEXTREG -#undef STACK_OFFSET - - return map; -} - -void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { - int opmask_state_bytes = 0; - int additional_frame_bytes = 0; - int num_xmm_regs = XMMRegister::number_of_registers; - int ymm_bytes = num_xmm_regs * 16; - int zmm_bytes = num_xmm_regs * 32; - // Recover XMM & FPU state -#ifdef COMPILER2 - if (restore_vectors) { - assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX"); - assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported"); - // Save upper half of YMM registers - additional_frame_bytes = ymm_bytes; - if (UseAVX > 2) { - // Save upper half of ZMM registers as well - additional_frame_bytes += zmm_bytes; - opmask_state_bytes = KRegister::number_of_registers * 8; - additional_frame_bytes += opmask_state_bytes; - } - } -#else - assert(!restore_vectors, "vectors are generated only by C2"); -#endif - - int off = xmm0_off; - int delta = xmm1_off - off; - - __ vzeroupper(); - - if (UseSSE == 1) { - // Restore XMM registers - assert(additional_frame_bytes == 0, ""); - for (int n = 0; n < num_xmm_regs; n++) { - __ movflt(as_XMMRegister(n), Address(rsp, off*wordSize)); - off += delta; - } - } else if (UseSSE >= 2) { - // Restore whole 128bit (16 bytes) XMM registers. Do this before restoring YMM and - // ZMM because the movdqu instruction zeros the upper part of the XMM register. - for (int n = 0; n < num_xmm_regs; n++) { - __ movdqu(as_XMMRegister(n), Address(rsp, off*wordSize+additional_frame_bytes)); - off += delta; - } - } - - if (restore_vectors) { - off = additional_frame_bytes - ymm_bytes; - // Restore upper half of YMM registers. - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16+off)); - } - if (UseAVX > 2) { - // Restore upper half of ZMM registers. - off = opmask_state_bytes; - for (int n = 0; n < num_xmm_regs; n++) { - __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32+off)); - } - for (int n = 0; n < KRegister::number_of_registers; n++) { - __ kmov(as_KRegister(n), Address(rsp, n*8)); - } - } - __ addptr(rsp, additional_frame_bytes); - } - - __ pop_FPU_state(); - __ addptr(rsp, FPU_regs_live*wordSize); // Pop FPU registers - - __ popf(); - __ popa(); - // Get the rbp, described implicitly by the frame sender code (no oopMap) - __ pop(rbp); -} - -void RegisterSaver::restore_result_registers(MacroAssembler* masm) { - - // Just restore result register. Only used by deoptimization. By - // now any callee save register that needs to be restore to a c2 - // caller of the deoptee has been extracted into the vframeArray - // and will be stuffed into the c2i adapter we create for later - // restoration so only result registers need to be restored here. - // - - __ frstor(Address(rsp, 0)); // Restore fpu state - - // Recover XMM & FPU state - if( UseSSE == 1 ) { - __ movflt(xmm0, Address(rsp, xmm0_off*wordSize)); - } else if( UseSSE >= 2 ) { - __ movdbl(xmm0, Address(rsp, xmm0_off*wordSize)); - } - __ movptr(rax, Address(rsp, rax_off*wordSize)); - __ movptr(rdx, Address(rsp, rdx_off*wordSize)); - // Pop all of the register save are off the stack except the return address - __ addptr(rsp, return_off * wordSize); -} - -// Is vector's size (in bytes) bigger than a size saved by default? -// 16 bytes XMM registers are saved by default using SSE2 movdqu instructions. -// Note, MaxVectorSize == 0 with UseSSE < 2 and vectors are not generated. -bool SharedRuntime::is_wide_vector(int size) { - return size > 16; -} - -// The java_calling_convention describes stack locations as ideal slots on -// a frame with no abi restrictions. Since we must observe abi restrictions -// (like the placement of the register window) the slots must be biased by -// the following value. -static int reg2offset_in(VMReg r) { - // Account for saved rbp, and return address - // This should really be in_preserve_stack_slots - return (r->reg2stack() + 2) * VMRegImpl::stack_slot_size; -} - -static int reg2offset_out(VMReg r) { - return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; -} - -// --------------------------------------------------------------------------- -// Read the array of BasicTypes from a signature, and compute where the -// arguments should go. Values in the VMRegPair regs array refer to 4-byte -// quantities. Values less than SharedInfo::stack0 are registers, those above -// refer to 4-byte stack slots. All stack slots are based off of the stack pointer -// as framesizes are fixed. -// VMRegImpl::stack0 refers to the first slot 0(sp). -// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. -// Register up to Register::number_of_registers are the 32-bit -// integer registers. - -// Pass first two oop/int args in registers ECX and EDX. -// Pass first two float/double args in registers XMM0 and XMM1. -// Doubles have precedence, so if you pass a mix of floats and doubles -// the doubles will grab the registers before the floats will. - -// Note: the INPUTS in sig_bt are in units of Java argument words, which are -// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit -// units regardless of build. Of course for i486 there is no 64 bit build - - -// --------------------------------------------------------------------------- -// The compiled Java calling convention. -// Pass first two oop/int args in registers ECX and EDX. -// Pass first two float/double args in registers XMM0 and XMM1. -// Doubles have precedence, so if you pass a mix of floats and doubles -// the doubles will grab the registers before the floats will. -int SharedRuntime::java_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, - int total_args_passed) { - uint stack = 0; // Starting stack position for args on stack - - - // Pass first two oop/int args in registers ECX and EDX. - uint reg_arg0 = 9999; - uint reg_arg1 = 9999; - - // Pass first two float/double args in registers XMM0 and XMM1. - // Doubles have precedence, so if you pass a mix of floats and doubles - // the doubles will grab the registers before the floats will. - // CNC - TURNED OFF FOR non-SSE. - // On Intel we have to round all doubles (and most floats) at - // call sites by storing to the stack in any case. - // UseSSE=0 ==> Don't Use ==> 9999+0 - // UseSSE=1 ==> Floats only ==> 9999+1 - // UseSSE>=2 ==> Floats or doubles ==> 9999+2 - enum { fltarg_dontuse = 9999+0, fltarg_float_only = 9999+1, fltarg_flt_dbl = 9999+2 }; - uint fargs = (UseSSE>=2) ? 2 : UseSSE; - uint freg_arg0 = 9999+fargs; - uint freg_arg1 = 9999+fargs; - - // Pass doubles & longs aligned on the stack. First count stack slots for doubles - int i; - for( i = 0; i < total_args_passed; i++) { - if( sig_bt[i] == T_DOUBLE ) { - // first 2 doubles go in registers - if( freg_arg0 == fltarg_flt_dbl ) freg_arg0 = i; - else if( freg_arg1 == fltarg_flt_dbl ) freg_arg1 = i; - else // Else double is passed low on the stack to be aligned. - stack += 2; - } else if( sig_bt[i] == T_LONG ) { - stack += 2; - } - } - int dstack = 0; // Separate counter for placing doubles - - // Now pick where all else goes. - for( i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { - case T_SHORT: - case T_CHAR: - case T_BYTE: - case T_BOOLEAN: - case T_INT: - case T_ARRAY: - case T_OBJECT: - case T_ADDRESS: - if( reg_arg0 == 9999 ) { - reg_arg0 = i; - regs[i].set1(rcx->as_VMReg()); - } else if( reg_arg1 == 9999 ) { - reg_arg1 = i; - regs[i].set1(rdx->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stack++)); - } - break; - case T_FLOAT: - if( freg_arg0 == fltarg_flt_dbl || freg_arg0 == fltarg_float_only ) { - freg_arg0 = i; - regs[i].set1(xmm0->as_VMReg()); - } else if( freg_arg1 == fltarg_flt_dbl || freg_arg1 == fltarg_float_only ) { - freg_arg1 = i; - regs[i].set1(xmm1->as_VMReg()); - } else { - regs[i].set1(VMRegImpl::stack2reg(stack++)); - } - break; - case T_LONG: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - break; - case T_DOUBLE: - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - if( freg_arg0 == (uint)i ) { - regs[i].set2(xmm0->as_VMReg()); - } else if( freg_arg1 == (uint)i ) { - regs[i].set2(xmm1->as_VMReg()); - } else { - regs[i].set2(VMRegImpl::stack2reg(dstack)); - dstack += 2; - } - break; - case T_VOID: regs[i].set_bad(); break; - break; - default: - ShouldNotReachHere(); - break; - } - } - - return stack; -} - -// Patch the callers callsite with entry to compiled code if it exists. -static void patch_callers_callsite(MacroAssembler *masm) { - Label L; - __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - // Schedule the branch target address early. - // Call into the VM to patch the caller, then jump to compiled callee - // rax, isn't live so capture return address while we easily can - __ movptr(rax, Address(rsp, 0)); - __ pusha(); - __ pushf(); - - if (UseSSE == 1) { - __ subptr(rsp, 2*wordSize); - __ movflt(Address(rsp, 0), xmm0); - __ movflt(Address(rsp, wordSize), xmm1); - } - if (UseSSE >= 2) { - __ subptr(rsp, 4*wordSize); - __ movdbl(Address(rsp, 0), xmm0); - __ movdbl(Address(rsp, 2*wordSize), xmm1); - } -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // VM needs caller's callsite - __ push(rax); - // VM needs target method - __ push(rbx); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite))); - __ addptr(rsp, 2*wordSize); - - if (UseSSE == 1) { - __ movflt(xmm0, Address(rsp, 0)); - __ movflt(xmm1, Address(rsp, wordSize)); - __ addptr(rsp, 2*wordSize); - } - if (UseSSE >= 2) { - __ movdbl(xmm0, Address(rsp, 0)); - __ movdbl(xmm1, Address(rsp, 2*wordSize)); - __ addptr(rsp, 4*wordSize); - } - - __ popf(); - __ popa(); - __ bind(L); -} - - -static void move_c2i_double(MacroAssembler *masm, XMMRegister r, int st_off) { - int next_off = st_off - Interpreter::stackElementSize; - __ movdbl(Address(rsp, next_off), r); -} - -static void gen_c2i_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - Label& skip_fixup) { - // Before we get into the guts of the C2I adapter, see if we should be here - // at all. We've come from compiled code and are attempting to jump to the - // interpreter, which means the caller made a static call to get here - // (vcalls always get a compiled target if there is one). Check for a - // compiled target. If there is one, we need to patch the caller's call. - patch_callers_callsite(masm); - - __ bind(skip_fixup); - -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // Since all args are passed on the stack, total_args_passed * interpreter_ - // stack_element_size is the - // space we need. - int extraspace = total_args_passed * Interpreter::stackElementSize; - - // Get return address - __ pop(rax); - - // set senderSP value - __ movptr(rsi, rsp); - - __ subptr(rsp, extraspace); - - // Now write the args into the outgoing interpreter space - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // st_off points to lowest address on stack. - int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; - int next_off = st_off - Interpreter::stackElementSize; - - // Say 4 args: - // i st_off - // 0 12 T_LONG - // 1 8 T_VOID - // 2 4 T_OBJECT - // 3 0 T_BOOL - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - - if (r_1->is_stack()) { - // memory to memory use fpu stack top - int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; - - if (!r_2->is_valid()) { - __ movl(rdi, Address(rsp, ld_off)); - __ movptr(Address(rsp, st_off), rdi); - } else { - - // ld_off == LSW, ld_off+VMRegImpl::stack_slot_size == MSW - // st_off == MSW, st_off-wordSize == LSW - - __ movptr(rdi, Address(rsp, ld_off)); - __ movptr(Address(rsp, next_off), rdi); - __ movptr(rdi, Address(rsp, ld_off + wordSize)); - __ movptr(Address(rsp, st_off), rdi); - } - } else if (r_1->is_Register()) { - Register r = r_1->as_Register(); - if (!r_2->is_valid()) { - __ movl(Address(rsp, st_off), r); - } else { - // long/double in gpr - ShouldNotReachHere(); - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - __ movflt(Address(rsp, st_off), r_1->as_XMMRegister()); - } else { - assert(sig_bt[i] == T_DOUBLE || sig_bt[i] == T_LONG, "wrong type"); - move_c2i_double(masm, r_1->as_XMMRegister(), st_off); - } - } - } - - // Schedule the branch target address early. - __ movptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset()))); - // And repush original return address - __ push(rax); - __ jmp(rcx); -} - - -static void move_i2c_double(MacroAssembler *masm, XMMRegister r, Register saved_sp, int ld_off) { - int next_val_off = ld_off - Interpreter::stackElementSize; - __ movdbl(r, Address(saved_sp, next_val_off)); -} - -static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, - address code_start, address code_end, - Label& L_ok) { - Label L_fail; - __ lea(temp_reg, AddressLiteral(code_start, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::belowEqual, L_fail); - __ lea(temp_reg, AddressLiteral(code_end, relocInfo::none)); - __ cmpptr(pc_reg, temp_reg); - __ jcc(Assembler::below, L_ok); - __ bind(L_fail); -} - -void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs) { - // Note: rsi contains the senderSP on entry. We must preserve it since - // we may do a i2c -> c2i transition if we lose a race where compiled - // code goes non-entrant while we get args ready. - - // Adapters can be frameless because they do not require the caller - // to perform additional cleanup work, such as correcting the stack pointer. - // An i2c adapter is frameless because the *caller* frame, which is interpreted, - // routinely repairs its own stack pointer (from interpreter_frame_last_sp), - // even if a callee has modified the stack pointer. - // A c2i adapter is frameless because the *callee* frame, which is interpreted, - // routinely repairs its caller's stack pointer (from sender_sp, which is set - // up via the senderSP register). - // In other words, if *either* the caller or callee is interpreted, we can - // get the stack pointer repaired after a call. - // This is why c2i and i2c adapters cannot be indefinitely composed. - // In particular, if a c2i adapter were to somehow call an i2c adapter, - // both caller and callee would be compiled methods, and neither would - // clean up the stack pointer changes performed by the two adapters. - // If this happens, control eventually transfers back to the compiled - // caller, but with an uncorrected stack, causing delayed havoc. - - // Pick up the return address - __ movptr(rax, Address(rsp, 0)); - - if (VerifyAdapterCalls && - (Interpreter::code() != nullptr || StubRoutines::final_stubs_code() != nullptr)) { - // So, let's test for cascading c2i/i2c adapters right now. - // assert(Interpreter::contains($return_addr) || - // StubRoutines::contains($return_addr), - // "i2c adapter must return to an interpreter frame"); - __ block_comment("verify_i2c { "); - Label L_ok; - if (Interpreter::code() != nullptr) { - range_check(masm, rax, rdi, - Interpreter::code()->code_start(), Interpreter::code()->code_end(), - L_ok); - } - if (StubRoutines::initial_stubs_code() != nullptr) { - range_check(masm, rax, rdi, - StubRoutines::initial_stubs_code()->code_begin(), - StubRoutines::initial_stubs_code()->code_end(), - L_ok); - } - if (StubRoutines::final_stubs_code() != nullptr) { - range_check(masm, rax, rdi, - StubRoutines::final_stubs_code()->code_begin(), - StubRoutines::final_stubs_code()->code_end(), - L_ok); - } - const char* msg = "i2c adapter must return to an interpreter frame"; - __ block_comment(msg); - __ stop(msg); - __ bind(L_ok); - __ block_comment("} verify_i2ce "); - } - - // Must preserve original SP for loading incoming arguments because - // we need to align the outgoing SP for compiled code. - __ movptr(rdi, rsp); - - // Cut-out for having no stack args. Since up to 2 int/oop args are passed - // in registers, we will occasionally have no stack args. - int comp_words_on_stack = 0; - if (comp_args_on_stack) { - // Sig words on the stack are greater-than VMRegImpl::stack0. Those in - // registers are below. By subtracting stack0, we either get a negative - // number (all values in registers) or the maximum stack slot accessed. - // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); - // Convert 4-byte stack slots to words. - comp_words_on_stack = align_up(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; - // Round up to miminum stack alignment, in wordSize - comp_words_on_stack = align_up(comp_words_on_stack, 2); - __ subptr(rsp, comp_words_on_stack * wordSize); - } - - // Align the outgoing SP - __ andptr(rsp, -(StackAlignmentInBytes)); - - // push the return address on the stack (note that pushing, rather - // than storing it, yields the correct frame alignment for the callee) - __ push(rax); - - // Put saved SP in another register - const Register saved_sp = rax; - __ movptr(saved_sp, rdi); - - - // Will jump to the compiled code just as if compiled code was doing it. - // Pre-load the register-jump target early, to schedule it better. - __ movptr(rdi, Address(rbx, in_bytes(Method::from_compiled_offset()))); - - // Now generate the shuffle code. Pick up all register args and move the - // rest through the floating point stack top. - for (int i = 0; i < total_args_passed; i++) { - if (sig_bt[i] == T_VOID) { - // Longs and doubles are passed in native word order, but misaligned - // in the 32-bit build. - assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); - continue; - } - - // Pick up 0, 1 or 2 words from SP+offset. - - assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), - "scrambled load targets?"); - // Load in argument order going down. - int ld_off = (total_args_passed - i) * Interpreter::stackElementSize; - // Point to interpreter value (vs. tag) - int next_off = ld_off - Interpreter::stackElementSize; - // - // - // - VMReg r_1 = regs[i].first(); - VMReg r_2 = regs[i].second(); - if (!r_1->is_valid()) { - assert(!r_2->is_valid(), ""); - continue; - } - if (r_1->is_stack()) { - // Convert stack slot to an SP offset (+ wordSize to account for return address ) - int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize; - - // We can use rsi as a temp here because compiled code doesn't need rsi as an input - // and if we end up going thru a c2i because of a miss a reasonable value of rsi - // we be generated. - if (!r_2->is_valid()) { - // __ fld_s(Address(saved_sp, ld_off)); - // __ fstp_s(Address(rsp, st_off)); - __ movl(rsi, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off), rsi); - } else { - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - // st_off is LSW (i.e. reg.first()) - // __ fld_d(Address(saved_sp, next_off)); - // __ fstp_d(Address(rsp, st_off)); - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - // - // Interpreter local[n] == MSW, local[n+1] == LSW however locals - // are accessed as negative so LSW is at LOW address - - // ld_off is MSW so get LSW - __ movptr(rsi, Address(saved_sp, next_off)); - __ movptr(Address(rsp, st_off), rsi); - __ movptr(rsi, Address(saved_sp, ld_off)); - __ movptr(Address(rsp, st_off + wordSize), rsi); - } - } else if (r_1->is_Register()) { // Register argument - Register r = r_1->as_Register(); - assert(r != rax, "must be different"); - if (r_2->is_valid()) { - // - // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE - // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case - // So we must adjust where to pick up the data to match the interpreter. - - // this can be a misaligned move - __ movptr(r, Address(saved_sp, next_off)); - assert(r_2->as_Register() != rax, "need another temporary register"); - // Remember r_1 is low address (and LSB on x86) - // So r_2 gets loaded from high address regardless of the platform - __ movptr(r_2->as_Register(), Address(saved_sp, ld_off)); - } else { - __ movl(r, Address(saved_sp, ld_off)); - } - } else { - assert(r_1->is_XMMRegister(), ""); - if (!r_2->is_valid()) { - __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off)); - } else { - move_i2c_double(masm, r_1->as_XMMRegister(), saved_sp, ld_off); - } - } - } - - // 6243940 We might end up in handle_wrong_method if - // the callee is deoptimized as we race thru here. If that - // happens we don't want to take a safepoint because the - // caller frame will look interpreted and arguments are now - // "compiled" so it is much better to make this transition - // invisible to the stack walking code. Unfortunately if - // we try and find the callee by normal means a safepoint - // is possible. So we stash the desired callee in the thread - // and the vm will find there should this case occur. - - __ get_thread(rax); - __ movptr(Address(rax, JavaThread::callee_target_offset()), rbx); - - // move Method* to rax, in case we end up in an c2i adapter. - // the c2i adapters expect Method* in rax, (c2) because c2's - // resolve stubs return the result (the method) in rax,. - // I'd love to fix this. - __ mov(rax, rbx); - - __ jmp(rdi); -} - -// --------------------------------------------------------------- -AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, - int total_args_passed, - int comp_args_on_stack, - const BasicType *sig_bt, - const VMRegPair *regs, - AdapterFingerPrint* fingerprint) { - address i2c_entry = __ pc(); - - gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); - - // ------------------------------------------------------------------------- - // Generate a C2I adapter. On entry we know rbx, holds the Method* during calls - // to the interpreter. The args start out packed in the compiled layout. They - // need to be unpacked into the interpreter layout. This will almost always - // require some stack space. We grow the current (compiled) stack, then repack - // the args. We finally end in a jump to the generic interpreter entry point. - // On exit from the interpreter, the interpreter will restore our SP (lest the - // compiled code, which relies solely on SP and not EBP, get sick). - - address c2i_unverified_entry = __ pc(); - Label skip_fixup; - - Register data = rax; - Register receiver = rcx; - Register temp = rbx; - - { - __ ic_check(1 /* end_alignment */); - __ movptr(rbx, Address(data, CompiledICData::speculated_method_offset())); - // Method might have been compiled since the call site was patched to - // interpreted if that is the case treat it as a miss so we can get - // the call site corrected. - __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), NULL_WORD); - __ jcc(Assembler::equal, skip_fixup); - } - - address c2i_entry = __ pc(); - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->c2i_entry_barrier(masm); - - gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); - - return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); -} - -int SharedRuntime::c_calling_convention(const BasicType *sig_bt, - VMRegPair *regs, - int total_args_passed) { - -// We return the amount of VMRegImpl stack slots we need to reserve for all -// the arguments NOT counting out_preserve_stack_slots. - - uint stack = 0; // All arguments on stack - - for( int i = 0; i < total_args_passed; i++) { - // From the type and the argument number (count) compute the location - switch( sig_bt[i] ) { - case T_BOOLEAN: - case T_CHAR: - case T_FLOAT: - case T_BYTE: - case T_SHORT: - case T_INT: - case T_OBJECT: - case T_ARRAY: - case T_ADDRESS: - case T_METADATA: - regs[i].set1(VMRegImpl::stack2reg(stack++)); - break; - case T_LONG: - case T_DOUBLE: // The stack numbering is reversed from Java - // Since C arguments do not get reversed, the ordering for - // doubles on the stack must be opposite the Java convention - assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" ); - regs[i].set2(VMRegImpl::stack2reg(stack)); - stack += 2; - break; - case T_VOID: regs[i].set_bad(); break; - default: - ShouldNotReachHere(); - break; - } - } - return stack; -} - -int SharedRuntime::vector_calling_convention(VMRegPair *regs, - uint num_bits, - uint total_args_passed) { - Unimplemented(); - return 0; -} - -// A simple move of integer like type -static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - if (src.first()->is_stack()) { - if (dst.first()->is_stack()) { - // stack to stack - // __ ld(FP, reg2offset(src.first()), L5); - // __ st(L5, SP, reg2offset(dst.first())); - __ movl2ptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - } else { - // stack to reg - __ movl2ptr(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first()))); - } - } else if (dst.first()->is_stack()) { - // reg to stack - // no need to sign extend on 64bit - __ movptr(Address(rsp, reg2offset_out(dst.first())), src.first()->as_Register()); - } else { - if (dst.first() != src.first()) { - __ mov(dst.first()->as_Register(), src.first()->as_Register()); - } - } -} - -// An oop arg. Must pass a handle not the oop itself -static void object_move(MacroAssembler* masm, - OopMap* map, - int oop_handle_offset, - int framesize_in_slots, - VMRegPair src, - VMRegPair dst, - bool is_receiver, - int* receiver_offset) { - - // Because of the calling conventions we know that src can be a - // register or a stack location. dst can only be a stack location. - - assert(dst.first()->is_stack(), "must be stack"); - // must pass a handle. First figure out the location we use as a handle - - if (src.first()->is_stack()) { - // Oop is already on the stack as an argument - Register rHandle = rax; - Label nil; - __ xorptr(rHandle, rHandle); - __ cmpptr(Address(rbp, reg2offset_in(src.first())), NULL_WORD); - __ jcc(Assembler::equal, nil); - __ lea(rHandle, Address(rbp, reg2offset_in(src.first()))); - __ bind(nil); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); - - int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); - if (is_receiver) { - *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; - } - } else { - // Oop is in a register we must store it to the space we reserve - // on the stack for oop_handles - const Register rOop = src.first()->as_Register(); - const Register rHandle = rax; - int oop_slot = (rOop == rcx ? 0 : 1) * VMRegImpl::slots_per_word + oop_handle_offset; - int offset = oop_slot*VMRegImpl::stack_slot_size; - Label skip; - __ movptr(Address(rsp, offset), rOop); - map->set_oop(VMRegImpl::stack2reg(oop_slot)); - __ xorptr(rHandle, rHandle); - __ cmpptr(rOop, NULL_WORD); - __ jcc(Assembler::equal, skip); - __ lea(rHandle, Address(rsp, offset)); - __ bind(skip); - // Store the handle parameter - __ movptr(Address(rsp, reg2offset_out(dst.first())), rHandle); - if (is_receiver) { - *receiver_offset = offset; - } - } -} - -// A float arg may have to do float reg int reg conversion -static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); - - // Because of the calling convention we know that src is either a stack location - // or an xmm register. dst can only be a stack location. - - assert(dst.first()->is_stack() && ( src.first()->is_stack() || src.first()->is_XMMRegister()), "bad parameters"); - - if (src.first()->is_stack()) { - __ movl(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - } else { - // reg to stack - __ movflt(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); - } -} - -// A long move -static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - - // The only legal possibility for a long_move VMRegPair is: - // 1: two stack slots (possibly unaligned) - // as neither the java or C calling convention will use registers - // for longs. - - if (src.first()->is_stack() && dst.first()->is_stack()) { - assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); - __ movptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(rbx, Address(rbp, reg2offset_in(src.second()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx); - } else { - ShouldNotReachHere(); - } -} - -// A double move -static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { - - // The only legal possibilities for a double_move VMRegPair are: - // The painful thing here is that like long_move a VMRegPair might be - - // Because of the calling convention we know that src is either - // 1: a single physical register (xmm registers only) - // 2: two stack slots (possibly unaligned) - // dst can only be a pair of stack slots. - - assert(dst.first()->is_stack() && (src.first()->is_XMMRegister() || src.first()->is_stack()), "bad args"); - - if (src.first()->is_stack()) { - // source is all stack - __ movptr(rax, Address(rbp, reg2offset_in(src.first()))); - __ movptr(rbx, Address(rbp, reg2offset_in(src.second()))); - __ movptr(Address(rsp, reg2offset_out(dst.first())), rax); - __ movptr(Address(rsp, reg2offset_out(dst.second())), rbx); - } else { - // reg to stack - // No worries about stack alignment - __ movdbl(Address(rsp, reg2offset_out(dst.first())), src.first()->as_XMMRegister()); - } -} - - -void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ fstp_s(Address(rbp, -wordSize)); - break; - case T_DOUBLE: - __ fstp_d(Address(rbp, -2*wordSize)); - break; - case T_VOID: break; - case T_LONG: - __ movptr(Address(rbp, -wordSize), rax); - __ movptr(Address(rbp, -2*wordSize), rdx); - break; - default: { - __ movptr(Address(rbp, -wordSize), rax); - } - } -} - -void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { - // We always ignore the frame_slots arg and just use the space just below frame pointer - // which by this time is free to use - switch (ret_type) { - case T_FLOAT: - __ fld_s(Address(rbp, -wordSize)); - break; - case T_DOUBLE: - __ fld_d(Address(rbp, -2*wordSize)); - break; - case T_LONG: - __ movptr(rax, Address(rbp, -wordSize)); - __ movptr(rdx, Address(rbp, -2*wordSize)); - break; - case T_VOID: break; - default: { - __ movptr(rax, Address(rbp, -wordSize)); - } - } -} - -static void verify_oop_args(MacroAssembler* masm, - const methodHandle& method, - const BasicType* sig_bt, - const VMRegPair* regs) { - Register temp_reg = rbx; // not part of any compiled calling seq - if (VerifyOops) { - for (int i = 0; i < method->size_of_parameters(); i++) { - if (is_reference_type(sig_bt[i])) { - VMReg r = regs[i].first(); - assert(r->is_valid(), "bad oop arg"); - if (r->is_stack()) { - __ movptr(temp_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - __ verify_oop(temp_reg); - } else { - __ verify_oop(r->as_Register()); - } - } - } - } -} - -static void gen_special_dispatch(MacroAssembler* masm, - const methodHandle& method, - const BasicType* sig_bt, - const VMRegPair* regs) { - verify_oop_args(masm, method, sig_bt, regs); - vmIntrinsics::ID iid = method->intrinsic_id(); - - // Now write the args into the outgoing interpreter space - bool has_receiver = false; - Register receiver_reg = noreg; - int member_arg_pos = -1; - Register member_reg = noreg; - int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); - if (ref_kind != 0) { - member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument - member_reg = rbx; // known to be free at this point - has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); - } else if (iid == vmIntrinsics::_invokeBasic) { - has_receiver = true; - } else { - fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid)); - } - - if (member_reg != noreg) { - // Load the member_arg into register, if necessary. - SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); - VMReg r = regs[member_arg_pos].first(); - if (r->is_stack()) { - __ movptr(member_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - } else { - // no data motion is needed - member_reg = r->as_Register(); - } - } - - if (has_receiver) { - // Make sure the receiver is loaded into a register. - assert(method->size_of_parameters() > 0, "oob"); - assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); - VMReg r = regs[0].first(); - assert(r->is_valid(), "bad receiver arg"); - if (r->is_stack()) { - // Porting note: This assumes that compiled calling conventions always - // pass the receiver oop in a register. If this is not true on some - // platform, pick a temp and load the receiver from stack. - fatal("receiver always in a register"); - receiver_reg = rcx; // known to be free at this point - __ movptr(receiver_reg, Address(rsp, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); - } else { - // no data motion is needed - receiver_reg = r->as_Register(); - } - } - - // Figure out which address we are really jumping to: - MethodHandles::generate_method_handle_dispatch(masm, iid, - receiver_reg, member_reg, /*for_compiler_entry:*/ true); -} - -// --------------------------------------------------------------------------- -// Generate a native wrapper for a given method. The method takes arguments -// in the Java compiled code convention, marshals them to the native -// convention (handlizes oops, etc), transitions to native, makes the call, -// returns to java state (possibly blocking), unhandlizes any result and -// returns. -// -// Critical native functions are a shorthand for the use of -// GetPrimtiveArrayCritical and disallow the use of any other JNI -// functions. The wrapper is expected to unpack the arguments before -// passing them to the callee. Critical native functions leave the state _in_Java, -// since they cannot stop for GC. -// Some other parts of JNI setup are skipped like the tear down of the JNI handle -// block and the check for pending exceptions it's impossible for them -// to be thrown. -// -// -nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, - const methodHandle& method, - int compile_id, - BasicType* in_sig_bt, - VMRegPair* in_regs, - BasicType ret_type) { - if (method->is_method_handle_intrinsic()) { - vmIntrinsics::ID iid = method->intrinsic_id(); - intptr_t start = (intptr_t)__ pc(); - int vep_offset = ((intptr_t)__ pc()) - start; - gen_special_dispatch(masm, - method, - in_sig_bt, - in_regs); - int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period - __ flush(); - int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually - return nmethod::new_native_nmethod(method, - compile_id, - masm->code(), - vep_offset, - frame_complete, - stack_slots / VMRegImpl::slots_per_word, - in_ByteSize(-1), - in_ByteSize(-1), - (OopMapSet*)nullptr); - } - address native_func = method->native_function(); - assert(native_func != nullptr, "must have function"); - - // An OopMap for lock (and class if static) - OopMapSet *oop_maps = new OopMapSet(); - - // We have received a description of where all the java arg are located - // on entry to the wrapper. We need to convert these args to where - // the jni function will expect them. To figure out where they go - // we convert the java signature to a C signature by inserting - // the hidden arguments as arg[0] and possibly arg[1] (static method) - - const int total_in_args = method->size_of_parameters(); - int total_c_args = total_in_args + (method->is_static() ? 2 : 1); - - BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); - VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); - - int argc = 0; - out_sig_bt[argc++] = T_ADDRESS; - if (method->is_static()) { - out_sig_bt[argc++] = T_OBJECT; - } - - for (int i = 0; i < total_in_args ; i++ ) { - out_sig_bt[argc++] = in_sig_bt[i]; - } - - // Now figure out where the args must be stored and how much stack space - // they require. - int out_arg_slots; - out_arg_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args); - - // Compute framesize for the wrapper. We need to handlize all oops in - // registers a max of 2 on x86. - - // Calculate the total number of stack slots we will need. - - // First count the abi requirement plus all of the outgoing args - int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; - - // Now the space for the inbound oop handle area - int total_save_slots = 2 * VMRegImpl::slots_per_word; // 2 arguments passed in registers - - int oop_handle_offset = stack_slots; - stack_slots += total_save_slots; - - // Now any space we need for handlizing a klass if static method - - int klass_slot_offset = 0; - int klass_offset = -1; - int lock_slot_offset = 0; - bool is_static = false; - - if (method->is_static()) { - klass_slot_offset = stack_slots; - stack_slots += VMRegImpl::slots_per_word; - klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; - is_static = true; - } - - // Plus a lock if needed - - if (method->is_synchronized()) { - lock_slot_offset = stack_slots; - stack_slots += VMRegImpl::slots_per_word; - } - - // Now a place (+2) to save return values or temp during shuffling - // + 2 for return address (which we own) and saved rbp, - stack_slots += 4; - - // Ok The space we have allocated will look like: - // - // - // FP-> | | - // |---------------------| - // | 2 slots for moves | - // |---------------------| - // | lock box (if sync) | - // |---------------------| <- lock_slot_offset (-lock_slot_rbp_offset) - // | klass (if static) | - // |---------------------| <- klass_slot_offset - // | oopHandle area | - // |---------------------| <- oop_handle_offset (a max of 2 registers) - // | outbound memory | - // | based arguments | - // | | - // |---------------------| - // | | - // SP-> | out_preserved_slots | - // - // - // **************************************************************************** - // WARNING - on Windows Java Natives use pascal calling convention and pop the - // arguments off of the stack after the jni call. Before the call we can use - // instructions that are SP relative. After the jni call we switch to FP - // relative instructions instead of re-adjusting the stack on windows. - // **************************************************************************** - - - // Now compute actual number of stack words we need rounding to make - // stack properly aligned. - stack_slots = align_up(stack_slots, StackAlignmentInSlots); - - int stack_size = stack_slots * VMRegImpl::stack_slot_size; - - intptr_t start = (intptr_t)__ pc(); - - // First thing make an ic check to see if we should even be here - - // We are free to use all registers as temps without saving them and - // restoring them except rbp. rbp is the only callee save register - // as far as the interpreter and the compiler(s) are concerned. - - - const Register receiver = rcx; - Label exception_pending; - - __ verify_oop(receiver); - // verified entry must be aligned for code patching. - __ ic_check(8 /* end_alignment */); - - int vep_offset = ((intptr_t)__ pc()) - start; - -#ifdef COMPILER1 - // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available. - if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) { - inline_check_hashcode_from_object_header(masm, method, rcx /*obj_reg*/, rax /*result*/); - } -#endif // COMPILER1 - - // The instruction at the verified entry point must be 5 bytes or longer - // because it can be patched on the fly by make_non_entrant. The stack bang - // instruction fits that requirement. - - // Generate stack overflow check - __ bang_stack_with_offset((int)StackOverflow::stack_shadow_zone_size()); - - // Generate a new frame for the wrapper. - __ enter(); - // -2 because return address is already present and so is saved rbp - __ subptr(rsp, stack_size - 2*wordSize); - - - BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */); - - // Frame is now completed as far as size and linkage. - int frame_complete = ((intptr_t)__ pc()) - start; - - // Calculate the difference between rsp and rbp,. We need to know it - // after the native call because on windows Java Natives will pop - // the arguments and it is painful to do rsp relative addressing - // in a platform independent way. So after the call we switch to - // rbp, relative addressing. - - int fp_adjustment = stack_size - 2*wordSize; - -#ifdef COMPILER2 - // C2 may leave the stack dirty if not in SSE2+ mode - if (UseSSE >= 2) { - __ verify_FPU(0, "c2i transition should have clean FPU stack"); - } else { - __ empty_FPU_stack(); - } -#endif /* COMPILER2 */ - - // Compute the rbp, offset for any slots used after the jni call - - int lock_slot_rbp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; - - // We use rdi as a thread pointer because it is callee save and - // if we load it once it is usable thru the entire wrapper - const Register thread = rdi; - - // We use rsi as the oop handle for the receiver/klass - // It is callee save so it survives the call to native - - const Register oop_handle_reg = rsi; - - __ get_thread(thread); - - // - // We immediately shuffle the arguments so that any vm call we have to - // make from here on out (sync slow path, jvmti, etc.) we will have - // captured the oops from our caller and have a valid oopMap for - // them. - - // ----------------- - // The Grand Shuffle - // - // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* - // and, if static, the class mirror instead of a receiver. This pretty much - // guarantees that register layout will not match (and x86 doesn't use reg - // parms though amd does). Since the native abi doesn't use register args - // and the java conventions does we don't have to worry about collisions. - // All of our moved are reg->stack or stack->stack. - // We ignore the extra arguments during the shuffle and handle them at the - // last moment. The shuffle is described by the two calling convention - // vectors we have in our possession. We simply walk the java vector to - // get the source locations and the c vector to get the destinations. - - int c_arg = method->is_static() ? 2 : 1; - - // Record rsp-based slot for receiver on stack for non-static methods - int receiver_offset = -1; - - // This is a trick. We double the stack slots so we can claim - // the oops in the caller's frame. Since we are sure to have - // more args than the caller doubling is enough to make - // sure we can capture all the incoming oop args from the - // caller. - // - OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - - // Mark location of rbp, - // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg()); - - // We know that we only have args in at most two integer registers (rcx, rdx). So rax, rbx - // Are free to temporaries if we have to do stack to steck moves. - // All inbound args are referenced based on rbp, and all outbound args via rsp. - - for (int i = 0; i < total_in_args ; i++, c_arg++ ) { - switch (in_sig_bt[i]) { - case T_ARRAY: - case T_OBJECT: - object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], - ((i == 0) && (!is_static)), - &receiver_offset); - break; - case T_VOID: - break; - - case T_FLOAT: - float_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_DOUBLE: - assert( i + 1 < total_in_args && - in_sig_bt[i + 1] == T_VOID && - out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); - double_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_LONG : - long_move(masm, in_regs[i], out_regs[c_arg]); - break; - - case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); - - default: - simple_move32(masm, in_regs[i], out_regs[c_arg]); - } - } - - // Pre-load a static method's oop into rsi. Used both by locking code and - // the normal JNI call code. - if (method->is_static()) { - - // load opp into a register - __ movoop(oop_handle_reg, JNIHandles::make_local(method->method_holder()->java_mirror())); - - // Now handlize the static class mirror it's known not-null. - __ movptr(Address(rsp, klass_offset), oop_handle_reg); - map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); - - // Now get the handle - __ lea(oop_handle_reg, Address(rsp, klass_offset)); - // store the klass handle as second argument - __ movptr(Address(rsp, wordSize), oop_handle_reg); - } - - // Change state to native (we save the return address in the thread, since it might not - // be pushed on the stack when we do a stack traversal). It is enough that the pc() - // points into the right code segment. It does not have to be the correct return pc. - // We use the same pc/oopMap repeatedly when we call out - - intptr_t the_pc = (intptr_t) __ pc(); - oop_maps->add_gc_map(the_pc - start, map); - - __ set_last_Java_frame(thread, rsp, noreg, (address)the_pc, noreg); - - - // We have all of the arguments setup at this point. We must not touch any register - // argument registers at this point (what if we save/restore them there are no oop? - - if (DTraceMethodProbes) { - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), - thread, rax); - } - - // RedefineClasses() tracing support for obsolete method entry - if (log_is_enabled(Trace, redefine, class, obsolete)) { - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), - thread, rax); - } - - // These are register definitions we need for locking/unlocking - const Register swap_reg = rax; // Must use rax, for cmpxchg instruction - const Register obj_reg = rcx; // Will contain the oop - const Register lock_reg = rdx; // Address of compiler lock object (BasicLock) - - Label slow_path_lock; - Label lock_done; - - // Lock a synchronized method - if (method->is_synchronized()) { - Label count_mon; - - const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); - - // Get the handle (the 2nd argument) - __ movptr(oop_handle_reg, Address(rsp, wordSize)); - - // Get address of the box - - __ lea(lock_reg, Address(rbp, lock_slot_rbp_offset)); - - // Load the oop from the handle - __ movptr(obj_reg, Address(oop_handle_reg, 0)); - - if (LockingMode == LM_MONITOR) { - __ jmp(slow_path_lock); - } else if (LockingMode == LM_LEGACY) { - // Load immediate 1 into swap_reg %rax, - __ movptr(swap_reg, 1); - - // Load (object->mark() | 1) into swap_reg %rax, - __ orptr(swap_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - - // Save (object->mark() | 1) into BasicLock's displaced header - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - - // src -> dest iff dest == rax, else rax, <- dest - // *obj_reg = lock_reg iff *obj_reg == rax, else rax, = *(obj_reg) - __ lock(); - __ cmpxchgptr(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::equal, count_mon); - - // Test if the oopMark is an obvious stack pointer, i.e., - // 1) (mark & 3) == 0, and - // 2) rsp <= mark < mark + os::pagesize() - // These 3 tests can be done by evaluating the following - // expression: ((mark - rsp) & (3 - os::vm_page_size())), - // assuming both stack pointer and pagesize have their - // least significant 2 bits clear. - // NOTE: the oopMark is in swap_reg %rax, as the result of cmpxchg - - __ subptr(swap_reg, rsp); - __ andptr(swap_reg, 3 - (int)os::vm_page_size()); - - // Save the test result, for recursive case, the result is zero - __ movptr(Address(lock_reg, mark_word_offset), swap_reg); - __ jcc(Assembler::notEqual, slow_path_lock); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - // Lacking registers and thread on x86_32. Always take slow path. - __ jmp(slow_path_lock); - } - __ bind(count_mon); - __ inc_held_monitor_count(); - - // Slow path will re-enter here - __ bind(lock_done); - } - - - // Finally just about ready to make the JNI call - - // get JNIEnv* which is first argument to native - __ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset()))); - __ movptr(Address(rsp, 0), rdx); - - // Now set thread in native - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native); - - __ call(RuntimeAddress(native_func)); - - // Verify or restore cpu control state after JNI call - __ restore_cpu_control_state_after_jni(noreg); - - // WARNING - on Windows Java Natives use pascal calling convention and pop the - // arguments off of the stack. We could just re-adjust the stack pointer here - // and continue to do SP relative addressing but we instead switch to FP - // relative addressing. - - // Unpack native results. - switch (ret_type) { - case T_BOOLEAN: __ c2bool(rax); break; - case T_CHAR : __ andptr(rax, 0xFFFF); break; - case T_BYTE : __ sign_extend_byte (rax); break; - case T_SHORT : __ sign_extend_short(rax); break; - case T_INT : /* nothing to do */ break; - case T_DOUBLE : - case T_FLOAT : - // Result is in st0 we'll save as needed - break; - case T_ARRAY: // Really a handle - case T_OBJECT: // Really a handle - break; // can't de-handlize until after safepoint check - case T_VOID: break; - case T_LONG: break; - default : ShouldNotReachHere(); - } - - // Switch thread to "native transition" state before reading the synchronization state. - // This additional state is necessary because reading and testing the synchronization - // state is not atomic w.r.t. GC, as this scenario demonstrates: - // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. - // VM thread changes sync state to synchronizing and suspends threads for GC. - // Thread A is resumed to finish this native method, but doesn't block here since it - // didn't see any synchronization is progress, and escapes. - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); - - // Force this write out before the read below - if (!UseSystemMemoryBarrier) { - __ membar(Assembler::Membar_mask_bits( - Assembler::LoadLoad | Assembler::LoadStore | - Assembler::StoreLoad | Assembler::StoreStore)); - } - - if (AlwaysRestoreFPU) { - // Make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - // check for safepoint operation in progress and/or pending suspend requests - { Label Continue, slow_path; - - __ safepoint_poll(slow_path, thread, true /* at_return */, false /* in_nmethod */); - - __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0); - __ jcc(Assembler::equal, Continue); - __ bind(slow_path); - - // Don't use call_VM as it will see a possible pending exception and forward it - // and never return here preventing us from clearing _last_native_pc down below. - // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are - // preserved and correspond to the bcp/locals pointers. So we do a runtime call - // by hand. - // - __ vzeroupper(); - - save_native_result(masm, ret_type, stack_slots); - __ push(thread); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, - JavaThread::check_special_condition_for_native_trans))); - __ increment(rsp, wordSize); - // Restore any method result value - restore_native_result(masm, ret_type, stack_slots); - __ bind(Continue); - } - - // change thread state - __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java); - - Label reguard; - Label reguard_done; - __ cmpl(Address(thread, JavaThread::stack_guard_state_offset()), StackOverflow::stack_guard_yellow_reserved_disabled); - __ jcc(Assembler::equal, reguard); - - // slow path reguard re-enters here - __ bind(reguard_done); - - // Handle possible exception (will unlock if necessary) - - // native result if any is live - - // Unlock - Label slow_path_unlock; - Label unlock_done; - if (method->is_synchronized()) { - - Label fast_done; - - // Get locked oop from the handle we passed to jni - __ movptr(obj_reg, Address(oop_handle_reg, 0)); - - if (LockingMode == LM_LEGACY) { - Label not_recur; - // Simple recursive lock? - __ cmpptr(Address(rbp, lock_slot_rbp_offset), NULL_WORD); - __ jcc(Assembler::notEqual, not_recur); - __ dec_held_monitor_count(); - __ jmpb(fast_done); - __ bind(not_recur); - } - - // Must save rax, if it is live now because cmpxchg must use it - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - save_native_result(masm, ret_type, stack_slots); - } - - if (LockingMode == LM_MONITOR) { - __ jmp(slow_path_unlock); - } else if (LockingMode == LM_LEGACY) { - // get old displaced header - __ movptr(rbx, Address(rbp, lock_slot_rbp_offset)); - - // get address of the stack lock - __ lea(rax, Address(rbp, lock_slot_rbp_offset)); - - // Atomic swap old header if oop still contains the stack lock - // src -> dest iff dest == rax, else rax, <- dest - // *obj_reg = rbx, iff *obj_reg == rax, else rax, = *(obj_reg) - __ lock(); - __ cmpxchgptr(rbx, Address(obj_reg, oopDesc::mark_offset_in_bytes())); - __ jcc(Assembler::notEqual, slow_path_unlock); - __ dec_held_monitor_count(); - } else { - assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - __ lightweight_unlock(obj_reg, swap_reg, thread, lock_reg, slow_path_unlock); - __ dec_held_monitor_count(); - } - - // slow path re-enters here - __ bind(unlock_done); - if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { - restore_native_result(masm, ret_type, stack_slots); - } - - __ bind(fast_done); - } - - if (DTraceMethodProbes) { - // Tell dtrace about this method exit - save_native_result(masm, ret_type, stack_slots); - __ mov_metadata(rax, method()); - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), - thread, rax); - restore_native_result(masm, ret_type, stack_slots); - } - - // We can finally stop using that last_Java_frame we setup ages ago - - __ reset_last_Java_frame(thread, false); - - // Unbox oop result, e.g. JNIHandles::resolve value. - if (is_reference_type(ret_type)) { - __ resolve_jobject(rax /* value */, - thread /* thread */, - rcx /* tmp */); - } - - if (CheckJNICalls) { - // clear_pending_jni_exception_check - __ movptr(Address(thread, JavaThread::pending_jni_exception_check_fn_offset()), NULL_WORD); - } - - // reset handle block - __ movptr(rcx, Address(thread, JavaThread::active_handles_offset())); - __ movl(Address(rcx, JNIHandleBlock::top_offset()), NULL_WORD); - - // Any exception pending? - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::notEqual, exception_pending); - - // no exception, we're almost done - - // check that only result value is on FPU stack - __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); - - // Fixup floating pointer results so that result looks like a return from a compiled method - if (ret_type == T_FLOAT) { - if (UseSSE >= 1) { - // Pop st0 and store as float and reload into xmm register - __ fstp_s(Address(rbp, -4)); - __ movflt(xmm0, Address(rbp, -4)); - } - } else if (ret_type == T_DOUBLE) { - if (UseSSE >= 2) { - // Pop st0 and store as double and reload into xmm register - __ fstp_d(Address(rbp, -8)); - __ movdbl(xmm0, Address(rbp, -8)); - } - } - - // Return - - __ leave(); - __ ret(0); - - // Unexpected paths are out of line and go here - - // Slow path locking & unlocking - if (method->is_synchronized()) { - - // BEGIN Slow path lock - - __ bind(slow_path_lock); - - // has last_Java_frame setup. No exceptions so do vanilla call not call_VM - // args are (oop obj, BasicLock* lock, JavaThread* thread) - __ push(thread); - __ push(lock_reg); - __ push(obj_reg); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C))); - __ addptr(rsp, 3*wordSize); - -#ifdef ASSERT - { Label L; - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("no pending exception allowed on exit from monitorenter"); - __ bind(L); - } -#endif - __ jmp(lock_done); - - // END Slow path lock - - // BEGIN Slow path unlock - __ bind(slow_path_unlock); - __ vzeroupper(); - // Slow path unlock - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - save_native_result(masm, ret_type, stack_slots); - } - // Save pending exception around call to VM (which contains an EXCEPTION_MARK) - - __ pushptr(Address(thread, in_bytes(Thread::pending_exception_offset()))); - __ movptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - - - // should be a peal - // +wordSize because of the push above - // args are (oop obj, BasicLock* lock, JavaThread* thread) - __ push(thread); - __ lea(rax, Address(rbp, lock_slot_rbp_offset)); - __ push(rax); - - __ push(obj_reg); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C))); - __ addptr(rsp, 3*wordSize); -#ifdef ASSERT - { - Label L; - __ cmpptr(Address(thread, in_bytes(Thread::pending_exception_offset())), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); - __ bind(L); - } -#endif /* ASSERT */ - - __ popptr(Address(thread, in_bytes(Thread::pending_exception_offset()))); - - if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { - restore_native_result(masm, ret_type, stack_slots); - } - __ jmp(unlock_done); - // END Slow path unlock - - } - - // SLOW PATH Reguard the stack if needed - - __ bind(reguard); - __ vzeroupper(); - save_native_result(masm, ret_type, stack_slots); - { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); - } - restore_native_result(masm, ret_type, stack_slots); - __ jmp(reguard_done); - - - // BEGIN EXCEPTION PROCESSING - - // Forward the exception - __ bind(exception_pending); - - // remove possible return value from FPU register stack - __ empty_FPU_stack(); - - // pop our frame - __ leave(); - // and forward the exception - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - __ flush(); - - nmethod *nm = nmethod::new_native_nmethod(method, - compile_id, - masm->code(), - vep_offset, - frame_complete, - stack_slots / VMRegImpl::slots_per_word, - (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), - in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), - oop_maps); - - return nm; - -} - -// this function returns the adjust size (in number of words) to a c2i adapter -// activation for use during deoptimization -int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) { - return (callee_locals - callee_parameters) * Interpreter::stackElementWords; -} - - -// Number of stack slots between incoming argument block and the start of -// a new frame. The PROLOG must add this many slots to the stack. The -// EPILOG must remove this many slots. Intel needs one slot for -// return address and one for rbp, (must save rbp) -uint SharedRuntime::in_preserve_stack_slots() { - return 2+VerifyStackAtCalls; -} - -uint SharedRuntime::out_preserve_stack_slots() { - return 0; -} - -VMReg SharedRuntime::thread_register() { - Unimplemented(); - return nullptr; -} - -//------------------------------generate_deopt_blob---------------------------- -void SharedRuntime::generate_deopt_blob() { - // allocate space for the code - ResourceMark rm; - // setup code generation tools - // note: the buffer code size must account for StackShadowPages=50 - const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); - CodeBuffer buffer(name, 1536, 1024); - MacroAssembler* masm = new MacroAssembler(&buffer); - int frame_size_in_words; - OopMap* map = nullptr; - // Account for the extra args we place on the stack - // by the time we call fetch_unroll_info - const int additional_words = 2; // deopt kind, thread - - OopMapSet *oop_maps = new OopMapSet(); - - // ------------- - // This code enters when returning to a de-optimized nmethod. A return - // address has been pushed on the stack, and return values are in - // registers. - // If we are doing a normal deopt then we were called from the patched - // nmethod from the point we returned to the nmethod. So the return - // address on the stack is wrong by NativeCall::instruction_size - // We will adjust the value to it looks like we have the original return - // address on the stack (like when we eagerly deoptimized). - // In the case of an exception pending with deoptimized then we enter - // with a return address on the stack that points after the call we patched - // into the exception handler. We have the following register state: - // rax,: exception - // rbx,: exception handler - // rdx: throwing pc - // So in this case we simply jam rdx into the useless return address and - // the stack looks just like we want. - // - // At this point we need to de-opt. We save the argument return - // registers. We call the first C routine, fetch_unroll_info(). This - // routine captures the return values and returns a structure which - // describes the current frame size and the sizes of all replacement frames. - // The current frame is compiled code and may contain many inlined - // functions, each with their own JVM state. We pop the current frame, then - // push all the new frames. Then we call the C routine unpack_frames() to - // populate these frames. Finally unpack_frames() returns us the new target - // address. Notice that callee-save registers are BLOWN here; they have - // already been captured in the vframeArray at the time the return PC was - // patched. - address start = __ pc(); - Label cont; - - // Prolog for non exception case! - - // Save everything in sight. - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - // Normal deoptimization - __ push(Deoptimization::Unpack_deopt); - __ jmp(cont); - - int reexecute_offset = __ pc() - start; - - // Reexecute case - // return address is the pc describes what bci to do re-execute at - - // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - - __ push(Deoptimization::Unpack_reexecute); - __ jmp(cont); - - int exception_offset = __ pc() - start; - - // Prolog for exception case - - // all registers are dead at this entry point, except for rax, and - // rdx which contain the exception oop and exception pc - // respectively. Set them in TLS and fall thru to the - // unpack_with_exception_in_tls entry point. - - __ get_thread(rdi); - __ movptr(Address(rdi, JavaThread::exception_pc_offset()), rdx); - __ movptr(Address(rdi, JavaThread::exception_oop_offset()), rax); - - int exception_in_tls_offset = __ pc() - start; - - // new implementation because exception oop is now passed in JavaThread - - // Prolog for exception case - // All registers must be preserved because they might be used by LinearScan - // Exceptiop oop and throwing PC are passed in JavaThread - // tos: stack at point of call to method that threw the exception (i.e. only - // args are on the stack, no return address) - - // make room on stack for the return address - // It will be patched later with the throwing pc. The correct value is not - // available now because loading it from memory would destroy registers. - __ push(0); - - // Save everything in sight. - - // No need to update map as each call to save_live_registers will produce identical oopmap - (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false); - - // Now it is safe to overwrite any register - - // store the correct deoptimization type - __ push(Deoptimization::Unpack_exception); - - // load throwing pc from JavaThread and patch it as the return address - // of the current frame. Then clear the field in JavaThread - __ get_thread(rdi); - __ movptr(rdx, Address(rdi, JavaThread::exception_pc_offset())); - __ movptr(Address(rbp, wordSize), rdx); - __ movptr(Address(rdi, JavaThread::exception_pc_offset()), NULL_WORD); - -#ifdef ASSERT - // verify that there is really an exception oop in JavaThread - __ movptr(rax, Address(rdi, JavaThread::exception_oop_offset())); - __ verify_oop(rax); - - // verify that there is no pending exception - Label no_pending_exception; - __ movptr(rax, Address(rdi, Thread::pending_exception_offset())); - __ testptr(rax, rax); - __ jcc(Assembler::zero, no_pending_exception); - __ stop("must not have pending exception here"); - __ bind(no_pending_exception); -#endif - - __ bind(cont); - - // Compiled code leaves the floating point stack dirty, empty it. - __ empty_FPU_stack(); - - - // Call C code. Need thread and this frame, but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. - __ get_thread(rcx); - __ push(rcx); - // fetch_unroll_info needs to call last_java_frame() - __ set_last_Java_frame(rcx, noreg, noreg, nullptr, noreg); - - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info))); - - // Need to have an oopmap that tells fetch_unroll_info where to - // find any register it might need. - - oop_maps->add_gc_map( __ pc()-start, map); - - // Discard args to fetch_unroll_info - __ pop(rcx); - __ pop(rcx); - - __ get_thread(rcx); - __ reset_last_Java_frame(rcx, false); - - // Load UnrollBlock into EDI - __ mov(rdi, rax); - - // Move the unpack kind to a safe place in the UnrollBlock because - // we are very short of registers - - Address unpack_kind(rdi, Deoptimization::UnrollBlock::unpack_kind_offset()); - // retrieve the deopt kind from the UnrollBlock. - __ movl(rax, unpack_kind); - - Label noException; - __ cmpl(rax, Deoptimization::Unpack_exception); // Was exception pending? - __ jcc(Assembler::notEqual, noException); - __ movptr(rax, Address(rcx, JavaThread::exception_oop_offset())); - __ movptr(rdx, Address(rcx, JavaThread::exception_pc_offset())); - __ movptr(Address(rcx, JavaThread::exception_oop_offset()), NULL_WORD); - __ movptr(Address(rcx, JavaThread::exception_pc_offset()), NULL_WORD); - - __ verify_oop(rax); - - // Overwrite the result registers with the exception results. - __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax); - __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx); - - __ bind(noException); - - // Stack is back to only having register save data on the stack. - // Now restore the result registers. Everything else is either dead or captured - // in the vframeArray. - - RegisterSaver::restore_result_registers(masm); - - // Non standard control word may be leaked out through a safepoint blob, and we can - // deopt at a poll point with the non standard control word. However, we should make - // sure the control word is correct after restore_result_registers. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - - // All of the register save area has been popped of the stack. Only the - // return address remains. - - // Pop all the frames we must move/replace. - // - // Frame picture (youngest to oldest) - // 1: self-frame (no frame link) - // 2: deopting frame (no frame link) - // 3: caller of deopting frame (could be compiled/interpreted). - // - // Note: by leaving the return address of self-frame on the stack - // and using the size of frame 2 to adjust the stack - // when we are done the return to frame 3 will still be on the stack. - - // Pop deoptimized frame - __ addptr(rsp, Address(rdi,Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset())); - - // sp should be pointing at the return address to the caller (3) - - // Pick up the initial fp we should save - // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved) - __ movptr(rbp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset())); - -#ifdef ASSERT - // Compilers generate code that bang the stack by as much as the - // interpreter would need. So this stack banging should never - // trigger a fault. Verify that it does not on non product builds. - __ movl(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset())); - __ bang_stack_size(rbx, rcx); -#endif - - // Load array of frame pcs into ECX - __ movptr(rcx,Address(rdi,Deoptimization::UnrollBlock::frame_pcs_offset())); - - __ pop(rsi); // trash the old pc - - // Load array of frame sizes into ESI - __ movptr(rsi,Address(rdi,Deoptimization::UnrollBlock::frame_sizes_offset())); - - Address counter(rdi, Deoptimization::UnrollBlock::counter_temp_offset()); - - __ movl(rbx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset())); - __ movl(counter, rbx); - - // Now adjust the caller's stack to make up for the extra locals - // but record the original sp so that we can save it in the skeletal interpreter - // frame and the stack walking of interpreter_sender will get the unextended sp - // value and not the "real" sp value. - - Address sp_temp(rdi, Deoptimization::UnrollBlock::sender_sp_temp_offset()); - __ movptr(sp_temp, rsp); - __ movl2ptr(rbx, Address(rdi, Deoptimization::UnrollBlock::caller_adjustment_offset())); - __ subptr(rsp, rbx); - - // Push interpreter frames in a loop - Label loop; - __ bind(loop); - __ movptr(rbx, Address(rsi, 0)); // Load frame size - __ subptr(rbx, 2*wordSize); // we'll push pc and rbp, by hand - __ pushptr(Address(rcx, 0)); // save return address - __ enter(); // save old & set new rbp, - __ subptr(rsp, rbx); // Prolog! - __ movptr(rbx, sp_temp); // sender's sp - // This value is corrected by layout_activation_impl - __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); - __ movptr(Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize), rbx); // Make it walkable - __ movptr(sp_temp, rsp); // pass to next frame - __ addptr(rsi, wordSize); // Bump array pointer (sizes) - __ addptr(rcx, wordSize); // Bump array pointer (pcs) - __ decrementl(counter); // decrement counter - __ jcc(Assembler::notZero, loop); - __ pushptr(Address(rcx, 0)); // save final return address - - // Re-push self-frame - __ enter(); // save old & set new rbp, - - // Return address and rbp, are in place - // We'll push additional args later. Just allocate a full sized - // register save area - __ subptr(rsp, (frame_size_in_words-additional_words - 2) * wordSize); - - // Restore frame locals after moving the frame - __ movptr(Address(rsp, RegisterSaver::raxOffset()*wordSize), rax); - __ movptr(Address(rsp, RegisterSaver::rdxOffset()*wordSize), rdx); - __ fstp_d(Address(rsp, RegisterSaver::fpResultOffset()*wordSize)); // Pop float stack and store in local - if( UseSSE>=2 ) __ movdbl(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0); - if( UseSSE==1 ) __ movflt(Address(rsp, RegisterSaver::xmm0Offset()*wordSize), xmm0); - - // Set up the args to unpack_frame - - __ pushl(unpack_kind); // get the unpack_kind value - __ get_thread(rcx); - __ push(rcx); - - // set last_Java_sp, last_Java_fp - __ set_last_Java_frame(rcx, noreg, rbp, nullptr, noreg); - - // Call C code. Need thread but NOT official VM entry - // crud. We cannot block on this call, no GC can happen. Call should - // restore return values to their stack-slots with the new SP. - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames))); - // Set an oopmap for the call site - oop_maps->add_gc_map( __ pc()-start, new OopMap( frame_size_in_words, 0 )); - - // rax, contains the return result type - __ push(rax); - - __ get_thread(rcx); - __ reset_last_Java_frame(rcx, false); - - // Collect return values - __ movptr(rax,Address(rsp, (RegisterSaver::raxOffset() + additional_words + 1)*wordSize)); - __ movptr(rdx,Address(rsp, (RegisterSaver::rdxOffset() + additional_words + 1)*wordSize)); - - // Clear floating point stack before returning to interpreter - __ empty_FPU_stack(); - - // Check if we should push the float or double return value. - Label results_done, yes_double_value; - __ cmpl(Address(rsp, 0), T_DOUBLE); - __ jcc (Assembler::zero, yes_double_value); - __ cmpl(Address(rsp, 0), T_FLOAT); - __ jcc (Assembler::notZero, results_done); - - // return float value as expected by interpreter - if( UseSSE>=1 ) __ movflt(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize)); - else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize)); - __ jmp(results_done); - - // return double value as expected by interpreter - __ bind(yes_double_value); - if( UseSSE>=2 ) __ movdbl(xmm0, Address(rsp, (RegisterSaver::xmm0Offset() + additional_words + 1)*wordSize)); - else __ fld_d(Address(rsp, (RegisterSaver::fpResultOffset() + additional_words + 1)*wordSize)); - - __ bind(results_done); - - // Pop self-frame. - __ leave(); // Epilog! - - // Jump to interpreter - __ ret(0); - - // ------------- - // make sure all code is generated - masm->flush(); - - _deopt_blob = DeoptimizationBlob::create( &buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); - _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); -} - -//------------------------------generate_handler_blob------ -// -// Generate a special Compile2Runtime blob that saves all registers, -// setup oopmap, and calls safepoint code to stop the compiled code for -// a safepoint. -// -SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) { - - // Account for thread arg in our frame - const int additional_words = 1; - int frame_size_in_words; - - assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); - assert(is_polling_page_id(id), "expected a polling page stub id"); - - ResourceMark rm; - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map; - - // allocate space for the code - // setup code generation tools - const char* name = SharedRuntime::stub_name(id); - CodeBuffer buffer(name, 2048, 1024); - MacroAssembler* masm = new MacroAssembler(&buffer); - - const Register java_thread = rdi; // callee-saved for VC++ - address start = __ pc(); - address call_pc = nullptr; - bool cause_return = (id == SharedStubId::polling_page_return_handler_id); - bool save_vectors = (id == SharedStubId::polling_page_vectors_safepoint_handler_id); - - // If cause_return is true we are at a poll_return and there is - // the return address on the stack to the caller on the nmethod - // that is safepoint. We can leave this return on the stack and - // effectively complete the return and safepoint in the caller. - // Otherwise we push space for a return address that the safepoint - // handler will install later to make the stack walking sensible. - if (!cause_return) - __ push(rbx); // Make room for return address (or push it again) - - map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, false, save_vectors); - - // The following is basically a call_VM. However, we need the precise - // address of the call in order to generate an oopmap. Hence, we do all the - // work ourselves. - - // Push thread argument and setup last_Java_sp - __ get_thread(java_thread); - __ push(java_thread); - __ set_last_Java_frame(java_thread, noreg, noreg, nullptr, noreg); - - // if this was not a poll_return then we need to correct the return address now. - if (!cause_return) { - // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack. - // Additionally, rbx is a callee saved register and we can look at it later to determine - // if someone changed the return address for us! - __ movptr(rbx, Address(java_thread, JavaThread::saved_exception_pc_offset())); - __ movptr(Address(rbp, wordSize), rbx); - } - - // do the call - __ call(RuntimeAddress(call_ptr)); - - // Set an oopmap for the call site. This oopmap will map all - // oop-registers and debug-info registers as callee-saved. This - // will allow deoptimization at this safepoint to find all possible - // debug-info recordings, as well as let GC find all oops. - - oop_maps->add_gc_map( __ pc() - start, map); - - // Discard arg - __ pop(rcx); - - Label noException; - - // Clear last_Java_sp again - __ get_thread(java_thread); - __ reset_last_Java_frame(java_thread, false); - - __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::equal, noException); - - // Exception pending - RegisterSaver::restore_live_registers(masm, save_vectors); - - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - __ bind(noException); - - Label no_adjust, bail, not_special; - if (!cause_return) { - // If our stashed return pc was modified by the runtime we avoid touching it - __ cmpptr(rbx, Address(rbp, wordSize)); - __ jccb(Assembler::notEqual, no_adjust); - - // Skip over the poll instruction. - // See NativeInstruction::is_safepoint_poll() - // Possible encodings: - // 85 00 test %eax,(%rax) - // 85 01 test %eax,(%rcx) - // 85 02 test %eax,(%rdx) - // 85 03 test %eax,(%rbx) - // 85 06 test %eax,(%rsi) - // 85 07 test %eax,(%rdi) - // - // 85 04 24 test %eax,(%rsp) - // 85 45 00 test %eax,0x0(%rbp) - -#ifdef ASSERT - __ movptr(rax, rbx); // remember where 0x85 should be, for verification below -#endif - // rsp/rbp base encoding takes 3 bytes with the following register values: - // rsp 0x04 - // rbp 0x05 - __ movzbl(rcx, Address(rbx, 1)); - __ andptr(rcx, 0x07); // looking for 0x04 .. 0x05 - __ subptr(rcx, 4); // looking for 0x00 .. 0x01 - __ cmpptr(rcx, 1); - __ jcc(Assembler::above, not_special); - __ addptr(rbx, 1); - __ bind(not_special); -#ifdef ASSERT - // Verify the correct encoding of the poll we're about to skip. - __ cmpb(Address(rax, 0), NativeTstRegMem::instruction_code_memXregl); - __ jcc(Assembler::notEqual, bail); - // Mask out the modrm bits - __ testb(Address(rax, 1), NativeTstRegMem::modrm_mask); - // rax encodes to 0, so if the bits are nonzero it's incorrect - __ jcc(Assembler::notZero, bail); -#endif - // Adjust return pc forward to step over the safepoint poll instruction - __ addptr(rbx, 2); - __ movptr(Address(rbp, wordSize), rbx); - } - - __ bind(no_adjust); - // Normal exit, register restoring and exit - RegisterSaver::restore_live_registers(masm, save_vectors); - - __ ret(0); - -#ifdef ASSERT - __ bind(bail); - __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); -#endif - - // make sure all code is generated - masm->flush(); - - // Fill-out other meta info - return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); -} - -// -// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss -// -// Generate a stub that calls into vm to find out the proper destination -// of a java call. All the argument registers are live at this point -// but since this is generic code we don't know what they are and the caller -// must do any gc of the args. -// -RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) { - assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); - assert(is_resolve_id(id), "expected a resolve stub id"); - - // allocate space for the code - ResourceMark rm; - - const char* name = SharedRuntime::stub_name(id); - CodeBuffer buffer(name, 1000, 512); - MacroAssembler* masm = new MacroAssembler(&buffer); - - int frame_size_words; - enum frame_layout { - thread_off, - extra_words }; - - OopMapSet *oop_maps = new OopMapSet(); - OopMap* map = nullptr; - - int start = __ offset(); - - map = RegisterSaver::save_live_registers(masm, extra_words, &frame_size_words); - - int frame_complete = __ offset(); - - const Register thread = rdi; - __ get_thread(rdi); - - __ push(thread); - __ set_last_Java_frame(thread, noreg, rbp, nullptr, noreg); - - __ call(RuntimeAddress(destination)); - - - // Set an oopmap for the call site. - // We need this not only for callee-saved registers, but also for volatile - // registers that the compiler might be keeping live across a safepoint. - - oop_maps->add_gc_map( __ offset() - start, map); - - // rax, contains the address we are going to jump to assuming no exception got installed - - __ addptr(rsp, wordSize); - - // clear last_Java_sp - __ reset_last_Java_frame(thread, true); - // check for pending exceptions - Label pending; - __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, pending); - - // get the returned Method* - __ get_vm_result_2(rbx, thread); - __ movptr(Address(rsp, RegisterSaver::rbx_offset() * wordSize), rbx); - - __ movptr(Address(rsp, RegisterSaver::rax_offset() * wordSize), rax); - - RegisterSaver::restore_live_registers(masm); - - // We are back to the original state on entry and ready to go. - - __ jmp(rax); - - // Pending exception after the safepoint - - __ bind(pending); - - RegisterSaver::restore_live_registers(masm); - - // exception pending => remove activation and forward to exception handler - - __ get_thread(thread); - __ movptr(Address(thread, JavaThread::vm_result_offset()), NULL_WORD); - __ movptr(rax, Address(thread, Thread::pending_exception_offset())); - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - // ------------- - // make sure all code is generated - masm->flush(); - - // return the blob - // frame_size_words or bytes?? - return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); -} - - //------------------------------------------------------------------------------------------------------------------------ - // Continuation point for throwing of implicit exceptions that are not handled in - // the current activation. Fabricates an exception oop and initiates normal - // exception dispatching in this frame. - // - // Previously the compiler (c2) allowed for callee save registers on Java calls. - // This is no longer true after adapter frames were removed but could possibly - // be brought back in the future if the interpreter code was reworked and it - // was deemed worthwhile. The comment below was left to describe what must - // happen here if callee saves were resurrected. As it stands now this stub - // could actually be a vanilla BufferBlob and have now oopMap at all. - // Since it doesn't make much difference we've chosen to leave it the - // way it was in the callee save days and keep the comment. - - // If we need to preserve callee-saved values we need a callee-saved oop map and - // therefore have to make these stubs into RuntimeStubs rather than BufferBlobs. - // If the compiler needs all registers to be preserved between the fault - // point and the exception handler then it must assume responsibility for that in - // AbstractCompiler::continuation_for_implicit_null_exception or - // continuation_for_implicit_division_by_zero_exception. All other implicit - // exceptions (e.g., NullPointerException or AbstractMethodError on entry) are - // either at call sites or otherwise assume that stack unwinding will be initiated, - // so caller saved registers were assumed volatile in the compiler. -RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) { - assert(is_throw_id(id), "expected a throw stub id"); - - const char* name = SharedRuntime::stub_name(id); - - // Information about frame layout at time of blocking runtime call. - // Note that we only have to preserve callee-saved registers since - // the compilers are responsible for supplying a continuation point - // if they expect all registers to be preserved. - enum layout { - thread_off, // last_java_sp - arg1_off, - arg2_off, - rbp_off, // callee saved register - ret_pc, - framesize - }; - - int insts_size = 256; - int locs_size = 32; - - ResourceMark rm; - const char* timer_msg = "SharedRuntime generate_throw_exception"; - TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); - - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - - // This is an inlined and slightly modified version of call_VM - // which has the ability to fetch the return PC out of - // thread-local storage and also sets up last_Java_sp slightly - // differently than the real call_VM - Register java_thread = rbx; - __ get_thread(java_thread); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - // pc and rbp, already pushed - __ subptr(rsp, (framesize-2) * wordSize); // prolog - - // Frame is now completed as far as size and linkage. - - int frame_complete = __ pc() - start; - - // push java thread (becomes first argument of C function) - __ movptr(Address(rsp, thread_off * wordSize), java_thread); - // Set up last_Java_sp and last_Java_fp - __ set_last_Java_frame(java_thread, rsp, rbp, nullptr, noreg); - - // Call runtime - BLOCK_COMMENT("call runtime_entry"); - __ call(RuntimeAddress(runtime_entry)); - // Generate oop map - OopMap* map = new OopMap(framesize, 0); - oop_maps->add_gc_map(__ pc() - start, map); - - // restore the thread (cannot use the pushed argument since arguments - // may be overwritten by C code generated by an optimizing compiler); - // however can use the register value directly if it is callee saved. - __ get_thread(java_thread); - - __ reset_last_Java_frame(java_thread, true); - - __ leave(); // required for proper stackwalking of RuntimeStub frame - - // check for pending exceptions -#ifdef ASSERT - Label L; - __ cmpptr(Address(java_thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, L); - __ should_not_reach_here(); - __ bind(L); -#endif /* ASSERT */ - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry())); - - - RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, &code, frame_complete, framesize, oop_maps, false); - return stub; -} - -#if INCLUDE_JFR - -static void jfr_prologue(address the_pc, MacroAssembler* masm) { - Register java_thread = rdi; - __ get_thread(java_thread); - __ set_last_Java_frame(java_thread, rsp, rbp, the_pc, noreg); - __ movptr(Address(rsp, 0), java_thread); -} - -// The handle is dereferenced through a load barrier. -static void jfr_epilogue(MacroAssembler* masm) { - Register java_thread = rdi; - __ get_thread(java_thread); - __ reset_last_Java_frame(java_thread, true); -} - -// For c2: c_rarg0 is junk, call to runtime to write a checkpoint. -// It returns a jobject handle to the event writer. -// The handle is dereferenced and the return value is the event writer oop. -RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { - enum layout { - FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - int insts_size = 1024; - int locs_size = 64; - const char* name = SharedRuntime::stub_name(SharedStubId::jfr_write_checkpoint_id); - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - __ enter(); - int frame_complete = __ pc() - start; - address the_pc = __ pc(); - jfr_prologue(the_pc, masm); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1); - jfr_epilogue(masm); - __ resolve_global_jobject(rax, rdi, rdx); - __ leave(); - __ ret(0); - - OopMap* map = new OopMap(framesize, 1); // rbp - oop_maps->add_gc_map(the_pc - start, map); - - RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) - RuntimeStub::new_runtime_stub(name, &code, frame_complete, - (framesize >> (LogBytesPerWord - LogBytesPerInt)), - oop_maps, false); - return stub; -} - -// For c2: call to return a leased buffer. -RuntimeStub* SharedRuntime::generate_jfr_return_lease() { - enum layout { - FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - int insts_size = 1024; - int locs_size = 64; - const char* name = SharedRuntime::stub_name(SharedStubId::jfr_return_lease_id); - CodeBuffer code(name, insts_size, locs_size); - OopMapSet* oop_maps = new OopMapSet(); - MacroAssembler* masm = new MacroAssembler(&code); - - address start = __ pc(); - __ enter(); - int frame_complete = __ pc() - start; - address the_pc = __ pc(); - jfr_prologue(the_pc, masm); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1); - jfr_epilogue(masm); - __ leave(); - __ ret(0); - - OopMap* map = new OopMap(framesize, 1); // rbp - oop_maps->add_gc_map(the_pc - start, map); - - RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size) - RuntimeStub::new_runtime_stub(name, &code, frame_complete, - (framesize >> (LogBytesPerWord - LogBytesPerInt)), - oop_maps, false); - return stub; -} - -#endif // INCLUDE_JFR diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp deleted file mode 100644 index de13772dcfb0d..0000000000000 --- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp +++ /dev/null @@ -1,4083 +0,0 @@ -/* - * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "asm/macroAssembler.inline.hpp" -#include "compiler/oopMap.hpp" -#include "gc/shared/barrierSet.hpp" -#include "gc/shared/barrierSetAssembler.hpp" -#include "gc/shared/barrierSetNMethod.hpp" -#include "interpreter/interpreter.hpp" -#include "memory/universe.hpp" -#include "nativeInst_x86.hpp" -#include "oops/instanceOop.hpp" -#include "oops/method.hpp" -#include "oops/objArrayKlass.hpp" -#include "oops/oop.inline.hpp" -#include "prims/methodHandles.hpp" -#include "runtime/frame.inline.hpp" -#include "runtime/handles.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubCodeGenerator.hpp" -#include "runtime/stubRoutines.hpp" -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -// Declaration and definition of StubGenerator (no .hpp file). -// For a more detailed description of the stub routine structure -// see the comment in stubRoutines.hpp - -#define __ _masm-> -#define a__ ((Assembler*)_masm)-> - -#ifdef PRODUCT -#define BLOCK_COMMENT(str) /* nothing */ -#else -#define BLOCK_COMMENT(str) __ block_comment(str) -#endif - -#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") - -const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions -const int FPU_CNTRL_WRD_MASK = 0xFFFF; - -ATTRIBUTE_ALIGNED(16) static const uint32_t KEY_SHUFFLE_MASK[] = { - 0x00010203UL, 0x04050607UL, 0x08090A0BUL, 0x0C0D0E0FUL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t COUNTER_SHUFFLE_MASK[] = { - 0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_BYTE_SWAP_MASK[] = { - 0x0C0D0E0FUL, 0x08090A0BUL, 0x04050607UL, 0x00010203UL, -}; - -ATTRIBUTE_ALIGNED(16) static const uint32_t GHASH_LONG_SWAP_MASK[] = { - 0x0B0A0908UL, 0x0F0E0D0CUL, 0x03020100UL, 0x07060504UL, -}; - -// ------------------------------------------------------------------------------------------------------------------------- -// Stub Code definitions - -class StubGenerator: public StubCodeGenerator { - private: - -#ifdef PRODUCT -#define inc_counter_np(counter) ((void)0) -#else - void inc_counter_np_(uint& counter) { - __ incrementl(ExternalAddress((address)&counter)); - } -#define inc_counter_np(counter) \ - BLOCK_COMMENT("inc_counter " #counter); \ - inc_counter_np_(counter); -#endif //PRODUCT - - void inc_copy_counter_np(BasicType t) { -#ifndef PRODUCT - switch (t) { - case T_BYTE: inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); return; - case T_SHORT: inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); return; - case T_INT: inc_counter_np(SharedRuntime::_jint_array_copy_ctr); return; - case T_LONG: inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); return; - case T_OBJECT: inc_counter_np(SharedRuntime::_oop_array_copy_ctr); return; - default: ShouldNotReachHere(); - } -#endif //PRODUCT - } - - //------------------------------------------------------------------------------------------------------------------------ - // Call stubs are used to call Java from C - // - // [ return_from_Java ] <--- rsp - // [ argument word n ] - // ... - // -N [ argument word 1 ] - // -7 [ Possible padding for stack alignment ] - // -6 [ Possible padding for stack alignment ] - // -5 [ Possible padding for stack alignment ] - // -4 [ mxcsr save ] <--- rsp_after_call - // -3 [ saved rbx, ] - // -2 [ saved rsi ] - // -1 [ saved rdi ] - // 0 [ saved rbp, ] <--- rbp, - // 1 [ return address ] - // 2 [ ptr. to call wrapper ] - // 3 [ result ] - // 4 [ result_type ] - // 5 [ method ] - // 6 [ entry_point ] - // 7 [ parameters ] - // 8 [ parameter_size ] - // 9 [ thread ] - - - address generate_call_stub(address& return_address) { - StubCodeMark mark(this, "StubRoutines", "call_stub"); - address start = __ pc(); - - // stub code parameters / addresses - assert(frame::entry_frame_call_wrapper_offset == 2, "adjust this code"); - bool sse_save = false; - const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_catch_exception()! - const int locals_count_in_bytes (4*wordSize); - const Address mxcsr_save (rbp, -4 * wordSize); - const Address saved_rbx (rbp, -3 * wordSize); - const Address saved_rsi (rbp, -2 * wordSize); - const Address saved_rdi (rbp, -1 * wordSize); - const Address result (rbp, 3 * wordSize); - const Address result_type (rbp, 4 * wordSize); - const Address method (rbp, 5 * wordSize); - const Address entry_point (rbp, 6 * wordSize); - const Address parameters (rbp, 7 * wordSize); - const Address parameter_size(rbp, 8 * wordSize); - const Address thread (rbp, 9 * wordSize); // same as in generate_catch_exception()! - sse_save = UseSSE > 0; - - // stub code - __ enter(); - __ movptr(rcx, parameter_size); // parameter counter - __ shlptr(rcx, Interpreter::logStackElementSize); // convert parameter count to bytes - __ addptr(rcx, locals_count_in_bytes); // reserve space for register saves - __ subptr(rsp, rcx); - __ andptr(rsp, -(StackAlignmentInBytes)); // Align stack - - // save rdi, rsi, & rbx, according to C calling conventions - __ movptr(saved_rdi, rdi); - __ movptr(saved_rsi, rsi); - __ movptr(saved_rbx, rbx); - - // save and initialize %mxcsr - if (sse_save) { - Label skip_ldmx; - __ stmxcsr(mxcsr_save); - __ movl(rax, mxcsr_save); - __ andl(rax, MXCSR_MASK); // Only check control and mask bits - ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); - __ cmp32(rax, mxcsr_std); - __ jcc(Assembler::equal, skip_ldmx); - __ ldmxcsr(mxcsr_std); - __ bind(skip_ldmx); - } - - // make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - -#ifdef ASSERT - // make sure we have no pending exceptions - { Label L; - __ movptr(rcx, thread); - __ cmpptr(Address(rcx, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::equal, L); - __ stop("StubRoutines::call_stub: entered with pending exception"); - __ bind(L); - } -#endif - - // pass parameters if any - BLOCK_COMMENT("pass parameters if any"); - Label parameters_done; - __ movl(rcx, parameter_size); // parameter counter - __ testl(rcx, rcx); - __ jcc(Assembler::zero, parameters_done); - - // parameter passing loop - - Label loop; - // Copy Java parameters in reverse order (receiver last) - // Note that the argument order is inverted in the process - // source is rdx[rcx: N-1..0] - // dest is rsp[rbx: 0..N-1] - - __ movptr(rdx, parameters); // parameter pointer - __ xorptr(rbx, rbx); - - __ BIND(loop); - - // get parameter - __ movptr(rax, Address(rdx, rcx, Interpreter::stackElementScale(), -wordSize)); - __ movptr(Address(rsp, rbx, Interpreter::stackElementScale(), - Interpreter::expr_offset_in_bytes(0)), rax); // store parameter - __ increment(rbx); - __ decrement(rcx); - __ jcc(Assembler::notZero, loop); - - // call Java function - __ BIND(parameters_done); - __ movptr(rbx, method); // get Method* - __ movptr(rax, entry_point); // get entry_point - __ mov(rsi, rsp); // set sender sp - BLOCK_COMMENT("call Java function"); - __ call(rax); - - BLOCK_COMMENT("call_stub_return_address:"); - return_address = __ pc(); - -#ifdef COMPILER2 - { - Label L_skip; - if (UseSSE >= 2) { - __ verify_FPU(0, "call_stub_return"); - } else { - for (int i = 1; i < 8; i++) { - __ ffree(i); - } - - // UseSSE <= 1 so double result should be left on TOS - __ movl(rsi, result_type); - __ cmpl(rsi, T_DOUBLE); - __ jcc(Assembler::equal, L_skip); - if (UseSSE == 0) { - // UseSSE == 0 so float result should be left on TOS - __ cmpl(rsi, T_FLOAT); - __ jcc(Assembler::equal, L_skip); - } - __ ffree(0); - } - __ BIND(L_skip); - } -#endif // COMPILER2 - - // store result depending on type - // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) - __ movptr(rdi, result); - Label is_long, is_float, is_double, exit; - __ movl(rsi, result_type); - __ cmpl(rsi, T_LONG); - __ jcc(Assembler::equal, is_long); - __ cmpl(rsi, T_FLOAT); - __ jcc(Assembler::equal, is_float); - __ cmpl(rsi, T_DOUBLE); - __ jcc(Assembler::equal, is_double); - - // handle T_INT case - __ movl(Address(rdi, 0), rax); - __ BIND(exit); - - // check that FPU stack is empty - __ verify_FPU(0, "generate_call_stub"); - - // pop parameters - __ lea(rsp, rsp_after_call); - - // restore %mxcsr - if (sse_save) { - __ ldmxcsr(mxcsr_save); - } - - // restore rdi, rsi and rbx, - __ movptr(rbx, saved_rbx); - __ movptr(rsi, saved_rsi); - __ movptr(rdi, saved_rdi); - __ addptr(rsp, 4*wordSize); - - // return - __ pop(rbp); - __ ret(0); - - // handle return types different from T_INT - __ BIND(is_long); - __ movl(Address(rdi, 0 * wordSize), rax); - __ movl(Address(rdi, 1 * wordSize), rdx); - __ jmp(exit); - - __ BIND(is_float); - // interpreter uses xmm0 for return values - if (UseSSE >= 1) { - __ movflt(Address(rdi, 0), xmm0); - } else { - __ fstp_s(Address(rdi, 0)); - } - __ jmp(exit); - - __ BIND(is_double); - // interpreter uses xmm0 for return values - if (UseSSE >= 2) { - __ movdbl(Address(rdi, 0), xmm0); - } else { - __ fstp_d(Address(rdi, 0)); - } - __ jmp(exit); - - return start; - } - - - //------------------------------------------------------------------------------------------------------------------------ - // Return point for a Java call if there's an exception thrown in Java code. - // The exception is caught and transformed into a pending exception stored in - // JavaThread that can be tested from within the VM. - // - // Note: Usually the parameters are removed by the callee. In case of an exception - // crossing an activation frame boundary, that is not the case if the callee - // is compiled code => need to setup the rsp. - // - // rax,: exception oop - - address generate_catch_exception() { - StubCodeMark mark(this, "StubRoutines", "catch_exception"); - const Address rsp_after_call(rbp, -4 * wordSize); // same as in generate_call_stub()! - const Address thread (rbp, 9 * wordSize); // same as in generate_call_stub()! - address start = __ pc(); - - // get thread directly - __ movptr(rcx, thread); -#ifdef ASSERT - // verify that threads correspond - { Label L; - __ get_thread(rbx); - __ cmpptr(rbx, rcx); - __ jcc(Assembler::equal, L); - __ stop("StubRoutines::catch_exception: threads must correspond"); - __ bind(L); - } -#endif - // set pending exception - __ verify_oop(rax); - __ movptr(Address(rcx, Thread::pending_exception_offset()), rax); - __ lea(Address(rcx, Thread::exception_file_offset()), - ExternalAddress((address)__FILE__), noreg); - __ movl(Address(rcx, Thread::exception_line_offset()), __LINE__ ); - // complete return to VM - assert(StubRoutines::_call_stub_return_address != nullptr, "_call_stub_return_address must have been generated before"); - __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address)); - - return start; - } - - - //------------------------------------------------------------------------------------------------------------------------ - // Continuation point for runtime calls returning with a pending exception. - // The pending exception check happened in the runtime or native call stub. - // The pending exception in Thread is converted into a Java-level exception. - // - // Contract with Java-level exception handlers: - // rax: exception - // rdx: throwing pc - // - // NOTE: At entry of this stub, exception-pc must be on stack !! - - address generate_forward_exception() { - StubCodeMark mark(this, "StubRoutines", "forward exception"); - address start = __ pc(); - const Register thread = rcx; - - // other registers used in this stub - const Register exception_oop = rax; - const Register handler_addr = rbx; - const Register exception_pc = rdx; - - // Upon entry, the sp points to the return address returning into Java - // (interpreted or compiled) code; i.e., the return address becomes the - // throwing pc. - // - // Arguments pushed before the runtime call are still on the stack but - // the exception handler will reset the stack pointer -> ignore them. - // A potential result in registers can be ignored as well. - -#ifdef ASSERT - // make sure this code is only executed if there is a pending exception - { Label L; - __ get_thread(thread); - __ cmpptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - __ jcc(Assembler::notEqual, L); - __ stop("StubRoutines::forward exception: no pending exception (1)"); - __ bind(L); - } -#endif - - // compute exception handler into rbx, - __ get_thread(thread); - __ movptr(exception_pc, Address(rsp, 0)); - BLOCK_COMMENT("call exception_handler_for_return_address"); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, exception_pc); - __ mov(handler_addr, rax); - - // setup rax & rdx, remove return address & clear pending exception - __ get_thread(thread); - __ pop(exception_pc); - __ movptr(exception_oop, Address(thread, Thread::pending_exception_offset())); - __ movptr(Address(thread, Thread::pending_exception_offset()), NULL_WORD); - -#ifdef ASSERT - // make sure exception is set - { Label L; - __ testptr(exception_oop, exception_oop); - __ jcc(Assembler::notEqual, L); - __ stop("StubRoutines::forward exception: no pending exception (2)"); - __ bind(L); - } -#endif - - // Verify that there is really a valid exception in RAX. - __ verify_oop(exception_oop); - - // continue at exception handler (return address removed) - // rax: exception - // rbx: exception handler - // rdx: throwing pc - __ jmp(handler_addr); - - return start; - } - - //---------------------------------------------------------------------------------------------------- - // Support for void verify_mxcsr() - // - // This routine is used with -Xcheck:jni to verify that native - // JNI code does not return to Java code without restoring the - // MXCSR register to our expected state. - - - address generate_verify_mxcsr() { - StubCodeMark mark(this, "StubRoutines", "verify_mxcsr"); - address start = __ pc(); - - const Address mxcsr_save(rsp, 0); - - if (CheckJNICalls && UseSSE > 0 ) { - Label ok_ret; - ExternalAddress mxcsr_std(StubRoutines::x86::addr_mxcsr_std()); - __ push(rax); - __ subptr(rsp, wordSize); // allocate a temp location - __ stmxcsr(mxcsr_save); - __ movl(rax, mxcsr_save); - __ andl(rax, MXCSR_MASK); - __ cmp32(rax, mxcsr_std); - __ jcc(Assembler::equal, ok_ret); - - __ warn("MXCSR changed by native JNI code."); - - __ ldmxcsr(mxcsr_std); - - __ bind(ok_ret); - __ addptr(rsp, wordSize); - __ pop(rax); - } - - __ ret(0); - - return start; - } - - - //--------------------------------------------------------------------------- - // Support for void verify_fpu_cntrl_wrd() - // - // This routine is used with -Xcheck:jni to verify that native - // JNI code does not return to Java code without restoring the - // FP control word to our expected state. - - address generate_verify_fpu_cntrl_wrd() { - StubCodeMark mark(this, "StubRoutines", "verify_spcw"); - address start = __ pc(); - - const Address fpu_cntrl_wrd_save(rsp, 0); - - if (CheckJNICalls) { - Label ok_ret; - __ push(rax); - __ subptr(rsp, wordSize); // allocate a temp location - __ fnstcw(fpu_cntrl_wrd_save); - __ movl(rax, fpu_cntrl_wrd_save); - __ andl(rax, FPU_CNTRL_WRD_MASK); - ExternalAddress fpu_std(StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - __ cmp32(rax, fpu_std); - __ jcc(Assembler::equal, ok_ret); - - __ warn("Floating point control word changed by native JNI code."); - - __ fldcw(fpu_std); - - __ bind(ok_ret); - __ addptr(rsp, wordSize); - __ pop(rax); - } - - __ ret(0); - - return start; - } - - //--------------------------------------------------------------------------- - // Wrapper for slow-case handling of double-to-integer conversion - // d2i or f2i fast case failed either because it is nan or because - // of under/overflow. - // Input: FPU TOS: float value - // Output: rax, (rdx): integer (long) result - - address generate_d2i_wrapper(BasicType t, address fcn) { - StubCodeMark mark(this, "StubRoutines", "d2i_wrapper"); - address start = __ pc(); - - // Capture info about frame layout - enum layout { FPUState_off = 0, - rbp_off = FPUStateSizeInWords, - rdi_off, - rsi_off, - rcx_off, - rbx_off, - saved_argument_off, - saved_argument_off2, // 2nd half of double - framesize - }; - - assert(FPUStateSizeInWords == 27, "update stack layout"); - - // Save outgoing argument to stack across push_FPU_state() - __ subptr(rsp, wordSize * 2); - __ fstp_d(Address(rsp, 0)); - - // Save CPU & FPU state - __ push(rbx); - __ push(rcx); - __ push(rsi); - __ push(rdi); - __ push(rbp); - __ push_FPU_state(); - - // push_FPU_state() resets the FP top of stack - // Load original double into FP top of stack - __ fld_d(Address(rsp, saved_argument_off * wordSize)); - // Store double into stack as outgoing argument - __ subptr(rsp, wordSize*2); - __ fst_d(Address(rsp, 0)); - - // Prepare FPU for doing math in C-land - __ empty_FPU_stack(); - // Call the C code to massage the double. Result in EAX - if (t == T_INT) - { BLOCK_COMMENT("SharedRuntime::d2i"); } - else if (t == T_LONG) - { BLOCK_COMMENT("SharedRuntime::d2l"); } - __ call_VM_leaf( fcn, 2 ); - - // Restore CPU & FPU state - __ pop_FPU_state(); - __ pop(rbp); - __ pop(rdi); - __ pop(rsi); - __ pop(rcx); - __ pop(rbx); - __ addptr(rsp, wordSize * 2); - - __ ret(0); - - return start; - } - //--------------------------------------------------------------------------------------------------- - - address generate_vector_mask(const char *stub_name, int32_t mask) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - - for (int i = 0; i < 16; i++) { - __ emit_data(mask, relocInfo::none, 0); - } - - return start; - } - - address generate_count_leading_zeros_lut(const char *stub_name) { - __ align64(); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x02020304, relocInfo::none, 0); - __ emit_data(0x01010101, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - return start; - } - - - address generate_popcount_avx_lut(const char *stub_name) { - __ align64(); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - __ emit_data(0x02010100, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x03020201, relocInfo::none, 0); - __ emit_data(0x04030302, relocInfo::none, 0); - return start; - } - - - address generate_iota_indices(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - // B - __ emit_data(0x03020100, relocInfo::none, 0); - __ emit_data(0x07060504, relocInfo::none, 0); - __ emit_data(0x0B0A0908, relocInfo::none, 0); - __ emit_data(0x0F0E0D0C, relocInfo::none, 0); - __ emit_data(0x13121110, relocInfo::none, 0); - __ emit_data(0x17161514, relocInfo::none, 0); - __ emit_data(0x1B1A1918, relocInfo::none, 0); - __ emit_data(0x1F1E1D1C, relocInfo::none, 0); - __ emit_data(0x23222120, relocInfo::none, 0); - __ emit_data(0x27262524, relocInfo::none, 0); - __ emit_data(0x2B2A2928, relocInfo::none, 0); - __ emit_data(0x2F2E2D2C, relocInfo::none, 0); - __ emit_data(0x33323130, relocInfo::none, 0); - __ emit_data(0x37363534, relocInfo::none, 0); - __ emit_data(0x3B3A3938, relocInfo::none, 0); - __ emit_data(0x3F3E3D3C, relocInfo::none, 0); - - // W - __ emit_data(0x00010000, relocInfo::none, 0); - __ emit_data(0x00030002, relocInfo::none, 0); - __ emit_data(0x00050004, relocInfo::none, 0); - __ emit_data(0x00070006, relocInfo::none, 0); - __ emit_data(0x00090008, relocInfo::none, 0); - __ emit_data(0x000B000A, relocInfo::none, 0); - __ emit_data(0x000D000C, relocInfo::none, 0); - __ emit_data(0x000F000E, relocInfo::none, 0); - __ emit_data(0x00110010, relocInfo::none, 0); - __ emit_data(0x00130012, relocInfo::none, 0); - __ emit_data(0x00150014, relocInfo::none, 0); - __ emit_data(0x00170016, relocInfo::none, 0); - __ emit_data(0x00190018, relocInfo::none, 0); - __ emit_data(0x001B001A, relocInfo::none, 0); - __ emit_data(0x001D001C, relocInfo::none, 0); - __ emit_data(0x001F001E, relocInfo::none, 0); - - // D - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000008, relocInfo::none, 0); - __ emit_data(0x00000009, relocInfo::none, 0); - __ emit_data(0x0000000A, relocInfo::none, 0); - __ emit_data(0x0000000B, relocInfo::none, 0); - __ emit_data(0x0000000C, relocInfo::none, 0); - __ emit_data(0x0000000D, relocInfo::none, 0); - __ emit_data(0x0000000E, relocInfo::none, 0); - __ emit_data(0x0000000F, relocInfo::none, 0); - - // Q - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - - // D - FP - __ emit_data(0x00000000, relocInfo::none, 0); // 0.0f - __ emit_data(0x3F800000, relocInfo::none, 0); // 1.0f - __ emit_data(0x40000000, relocInfo::none, 0); // 2.0f - __ emit_data(0x40400000, relocInfo::none, 0); // 3.0f - __ emit_data(0x40800000, relocInfo::none, 0); // 4.0f - __ emit_data(0x40A00000, relocInfo::none, 0); // 5.0f - __ emit_data(0x40C00000, relocInfo::none, 0); // 6.0f - __ emit_data(0x40E00000, relocInfo::none, 0); // 7.0f - __ emit_data(0x41000000, relocInfo::none, 0); // 8.0f - __ emit_data(0x41100000, relocInfo::none, 0); // 9.0f - __ emit_data(0x41200000, relocInfo::none, 0); // 10.0f - __ emit_data(0x41300000, relocInfo::none, 0); // 11.0f - __ emit_data(0x41400000, relocInfo::none, 0); // 12.0f - __ emit_data(0x41500000, relocInfo::none, 0); // 13.0f - __ emit_data(0x41600000, relocInfo::none, 0); // 14.0f - __ emit_data(0x41700000, relocInfo::none, 0); // 15.0f - - // Q - FP - __ emit_data(0x00000000, relocInfo::none, 0); // 0.0d - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 1.0d - __ emit_data(0x3FF00000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 2.0d - __ emit_data(0x40000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 3.0d - __ emit_data(0x40080000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 4.0d - __ emit_data(0x40100000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 5.0d - __ emit_data(0x40140000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 6.0d - __ emit_data(0x40180000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); // 7.0d - __ emit_data(0x401c0000, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_bit_lut(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - __ emit_data(0x0C040800, relocInfo::none, 0); - __ emit_data(0x0E060A02, relocInfo::none, 0); - __ emit_data(0x0D050901, relocInfo::none, 0); - __ emit_data(0x0F070B03, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_long(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_int(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090A0B, relocInfo::none, 0); - __ emit_data(0x0C0D0E0F, relocInfo::none, 0); - return start; - } - - address generate_vector_reverse_byte_perm_mask_short(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - __ emit_data(0x02030001, relocInfo::none, 0); - __ emit_data(0x06070405, relocInfo::none, 0); - __ emit_data(0x0A0B0809, relocInfo::none, 0); - __ emit_data(0x0E0F0C0D, relocInfo::none, 0); - return start; - } - - address generate_vector_byte_shuffle_mask(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0x70707070, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - __ emit_data(0xF0F0F0F0, relocInfo::none, 0); - return start; - } - - address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - - for (int i = 0; i < 8; i++) { - __ emit_data(masklo, relocInfo::none, 0); - __ emit_data(maskhi, relocInfo::none, 0); - } - - return start; - } - - //---------------------------------------------------------------------------------------------------- - - address generate_vector_byte_perm_mask(const char *stub_name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - - __ emit_data(0x00000001, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000003, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000005, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000007, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000002, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000004, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000006, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - - return start; - } - - address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len, - int32_t val0, int32_t val1, int32_t val2, int32_t val3, - int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0, - int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0, - int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", stub_name); - address start = __ pc(); - - assert(len != Assembler::AVX_NoVec, "vector len must be specified"); - __ emit_data(val0, relocInfo::none, 0); - __ emit_data(val1, relocInfo::none, 0); - __ emit_data(val2, relocInfo::none, 0); - __ emit_data(val3, relocInfo::none, 0); - if (len >= Assembler::AVX_256bit) { - __ emit_data(val4, relocInfo::none, 0); - __ emit_data(val5, relocInfo::none, 0); - __ emit_data(val6, relocInfo::none, 0); - __ emit_data(val7, relocInfo::none, 0); - if (len >= Assembler::AVX_512bit) { - __ emit_data(val8, relocInfo::none, 0); - __ emit_data(val9, relocInfo::none, 0); - __ emit_data(val10, relocInfo::none, 0); - __ emit_data(val11, relocInfo::none, 0); - __ emit_data(val12, relocInfo::none, 0); - __ emit_data(val13, relocInfo::none, 0); - __ emit_data(val14, relocInfo::none, 0); - __ emit_data(val15, relocInfo::none, 0); - } - } - - return start; - } - - //---------------------------------------------------------------------------------------------------- - // Non-destructive plausibility checks for oops - - address generate_verify_oop() { - StubCodeMark mark(this, "StubRoutines", "verify_oop"); - address start = __ pc(); - - // Incoming arguments on stack after saving rax,: - // - // [tos ]: saved rdx - // [tos + 1]: saved EFLAGS - // [tos + 2]: return address - // [tos + 3]: char* error message - // [tos + 4]: oop object to verify - // [tos + 5]: saved rax, - saved by caller and bashed - - Label exit, error; - __ pushf(); - __ incrementl(ExternalAddress((address) StubRoutines::verify_oop_count_addr())); - __ push(rdx); // save rdx - // make sure object is 'reasonable' - __ movptr(rax, Address(rsp, 4 * wordSize)); // get object - __ testptr(rax, rax); - __ jcc(Assembler::zero, exit); // if obj is null it is ok - - // Check if the oop is in the right area of memory - const int oop_mask = Universe::verify_oop_mask(); - const int oop_bits = Universe::verify_oop_bits(); - __ mov(rdx, rax); - __ andptr(rdx, oop_mask); - __ cmpptr(rdx, oop_bits); - __ jcc(Assembler::notZero, error); - - // make sure klass is 'reasonable', which is not zero. - __ movptr(rax, Address(rax, oopDesc::klass_offset_in_bytes())); // get klass - __ testptr(rax, rax); - __ jcc(Assembler::zero, error); // if klass is null it is broken - - // return if everything seems ok - __ bind(exit); - __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back - __ pop(rdx); // restore rdx - __ popf(); // restore EFLAGS - __ ret(3 * wordSize); // pop arguments - - // handle errors - __ bind(error); - __ movptr(rax, Address(rsp, 5 * wordSize)); // get saved rax, back - __ pop(rdx); // get saved rdx back - __ popf(); // get saved EFLAGS off stack -- will be ignored - __ pusha(); // push registers (eip = return address & msg are already pushed) - BLOCK_COMMENT("call MacroAssembler::debug"); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32))); - __ hlt(); - return start; - } - - - // Copy 64 bytes chunks - // - // Inputs: - // from - source array address - // to_from - destination array address - from - // qword_count - 8-bytes element count, negative - // - void xmm_copy_forward(Register from, Register to_from, Register qword_count) { - assert( UseSSE >= 2, "supported cpu only" ); - Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit; - - // Copy 64-byte chunks - __ jmpb(L_copy_64_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_64_bytes_loop); - - if (UseUnalignedLoadStores) { - if (UseAVX > 2) { - __ evmovdqul(xmm0, Address(from, 0), Assembler::AVX_512bit); - __ evmovdqul(Address(from, to_from, Address::times_1, 0), xmm0, Assembler::AVX_512bit); - } else if (UseAVX == 2) { - __ vmovdqu(xmm0, Address(from, 0)); - __ vmovdqu(Address(from, to_from, Address::times_1, 0), xmm0); - __ vmovdqu(xmm1, Address(from, 32)); - __ vmovdqu(Address(from, to_from, Address::times_1, 32), xmm1); - } else { - __ movdqu(xmm0, Address(from, 0)); - __ movdqu(Address(from, to_from, Address::times_1, 0), xmm0); - __ movdqu(xmm1, Address(from, 16)); - __ movdqu(Address(from, to_from, Address::times_1, 16), xmm1); - __ movdqu(xmm2, Address(from, 32)); - __ movdqu(Address(from, to_from, Address::times_1, 32), xmm2); - __ movdqu(xmm3, Address(from, 48)); - __ movdqu(Address(from, to_from, Address::times_1, 48), xmm3); - } - } else { - __ movq(xmm0, Address(from, 0)); - __ movq(Address(from, to_from, Address::times_1, 0), xmm0); - __ movq(xmm1, Address(from, 8)); - __ movq(Address(from, to_from, Address::times_1, 8), xmm1); - __ movq(xmm2, Address(from, 16)); - __ movq(Address(from, to_from, Address::times_1, 16), xmm2); - __ movq(xmm3, Address(from, 24)); - __ movq(Address(from, to_from, Address::times_1, 24), xmm3); - __ movq(xmm4, Address(from, 32)); - __ movq(Address(from, to_from, Address::times_1, 32), xmm4); - __ movq(xmm5, Address(from, 40)); - __ movq(Address(from, to_from, Address::times_1, 40), xmm5); - __ movq(xmm6, Address(from, 48)); - __ movq(Address(from, to_from, Address::times_1, 48), xmm6); - __ movq(xmm7, Address(from, 56)); - __ movq(Address(from, to_from, Address::times_1, 56), xmm7); - } - - __ addl(from, 64); - __ BIND(L_copy_64_bytes); - __ subl(qword_count, 8); - __ jcc(Assembler::greaterEqual, L_copy_64_bytes_loop); - - if (UseUnalignedLoadStores && (UseAVX == 2)) { - // clean upper bits of YMM registers - __ vpxor(xmm0, xmm0); - __ vpxor(xmm1, xmm1); - } - __ addl(qword_count, 8); - __ jccb(Assembler::zero, L_exit); - // - // length is too short, just copy qwords - // - __ BIND(L_copy_8_bytes); - __ movq(xmm0, Address(from, 0)); - __ movq(Address(from, to_from, Address::times_1), xmm0); - __ addl(from, 8); - __ decrement(qword_count); - __ jcc(Assembler::greater, L_copy_8_bytes); - __ BIND(L_exit); - } - - address generate_disjoint_copy(BasicType t, bool aligned, - Address::ScaleFactor sf, - address* entry, const char *name, - bool dest_uninitialized = false) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; - Label L_copy_2_bytes, L_copy_4_bytes, L_copy_64_bytes; - - int shift = Address::times_ptr - sf; - - const Register from = rsi; // source array address - const Register to = rdi; // destination array address - const Register count = rcx; // elements count - const Register to_from = to; // (to - from) - const Register saved_to = rdx; // saved destination array address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(from , Address(rsp, 12+ 4)); - __ movptr(to , Address(rsp, 12+ 8)); - __ movl(count, Address(rsp, 12+ 12)); - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from conjoint arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - if (t == T_OBJECT) { - __ testl(count, count); - __ jcc(Assembler::zero, L_0_count); - } - - DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - if (aligned) { - decorators |= ARRAYCOPY_ALIGNED; - } - - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - { - bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, add_entry, true); - __ subptr(to, from); // to --> to_from - __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); - - if (t == T_OBJECT) { - __ BIND(L_0_count); - } - inc_copy_counter_np(t); - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ vzeroupper(); - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - address generate_fill(BasicType t, bool aligned, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - - const Register to = rdi; // source array address - const Register value = rdx; // value - const Register count = rsi; // elements count - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(to , Address(rsp, 12+ 4)); - __ movl(value, Address(rsp, 12+ 8)); - __ movl(count, Address(rsp, 12+ 12)); - - __ generate_fill(t, aligned, to, value, count, rax, xmm0); - - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - return start; - } - - address generate_conjoint_copy(BasicType t, bool aligned, - Address::ScaleFactor sf, - address nooverlap_target, - address* entry, const char *name, - bool dest_uninitialized = false) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Label L_0_count, L_exit, L_skip_align1, L_skip_align2, L_copy_byte; - Label L_copy_2_bytes, L_copy_4_bytes, L_copy_8_bytes, L_copy_8_bytes_loop; - - int shift = Address::times_ptr - sf; - - const Register src = rax; // source array address - const Register dst = rdx; // destination array address - const Register from = rsi; // source array address - const Register to = rdi; // destination array address - const Register count = rcx; // elements count - const Register end = rax; // array end address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ movptr(src , Address(rsp, 12+ 4)); // from - __ movptr(dst , Address(rsp, 12+ 8)); // to - __ movl2ptr(count, Address(rsp, 12+12)); // count - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - // nooverlap_target expects arguments in rsi and rdi. - __ mov(from, src); - __ mov(to , dst); - - // arrays overlap test: dispatch to disjoint stub if necessary. - RuntimeAddress nooverlap(nooverlap_target); - __ cmpptr(dst, src); - __ lea(end, Address(src, count, sf, 0)); // src + count * elem_size - __ jump_cc(Assembler::belowEqual, nooverlap); - __ cmpptr(dst, end); - __ jump_cc(Assembler::aboveEqual, nooverlap); - - if (t == T_OBJECT) { - __ testl(count, count); - __ jcc(Assembler::zero, L_0_count); - } - - DecoratorSet decorators = IN_HEAP | IS_ARRAY; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - if (aligned) { - decorators |= ARRAYCOPY_ALIGNED; - } - - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, t, from, to, count); - - { - bool add_entry = (t != T_OBJECT && (!aligned || t == T_INT)); - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, add_entry, true); - // copy from high to low - __ cmpl(count, 2<arraycopy_epilogue(_masm, decorators, t, from, to, count); - - if (t == T_OBJECT) { - __ BIND(L_0_count); - } - inc_copy_counter_np(t); - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - address generate_disjoint_long_copy(address* entry, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Label L_copy_8_bytes, L_copy_8_bytes_loop; - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - const Register to_from = rdx; // (to - from) - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ movptr(from , Address(rsp, 8+0)); // from - __ movptr(to , Address(rsp, 8+4)); // to - __ movl2ptr(count, Address(rsp, 8+8)); // count - - *entry = __ pc(); // Entry point from conjoint arraycopy stub. - BLOCK_COMMENT("Entry:"); - - { - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, true, true); - __ subptr(to, from); // to --> to_from - if (UseXMMForArrayCopy) { - xmm_copy_forward(from, to_from, count); - } else { - __ jmpb(L_copy_8_bytes); - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - __ fild_d(Address(from, 0)); - __ fistp_d(Address(from, to_from, Address::times_1)); - __ addptr(from, 8); - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - } - } - inc_copy_counter_np(T_LONG); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ vzeroupper(); - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - address generate_conjoint_long_copy(address nooverlap_target, - address* entry, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Label L_copy_8_bytes, L_copy_8_bytes_loop; - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - const Register end_from = rax; // source array end address - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ movptr(from , Address(rsp, 8+0)); // from - __ movptr(to , Address(rsp, 8+4)); // to - __ movl2ptr(count, Address(rsp, 8+8)); // count - - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - - // arrays overlap test - __ cmpptr(to, from); - RuntimeAddress nooverlap(nooverlap_target); - __ jump_cc(Assembler::belowEqual, nooverlap); - __ lea(end_from, Address(from, count, Address::times_8, 0)); - __ cmpptr(to, end_from); - __ movptr(from, Address(rsp, 8)); // from - __ jump_cc(Assembler::aboveEqual, nooverlap); - - { - // UnsafeMemoryAccess page error: continue after unsafe access - UnsafeMemoryAccessMark umam(this, true, true); - - __ jmpb(L_copy_8_bytes); - - __ align(OptoLoopAlignment); - __ BIND(L_copy_8_bytes_loop); - if (UseXMMForArrayCopy) { - __ movq(xmm0, Address(from, count, Address::times_8)); - __ movq(Address(to, count, Address::times_8), xmm0); - } else { - __ fild_d(Address(from, count, Address::times_8)); - __ fistp_d(Address(to, count, Address::times_8)); - } - __ BIND(L_copy_8_bytes); - __ decrement(count); - __ jcc(Assembler::greaterEqual, L_copy_8_bytes_loop); - - } - inc_copy_counter_np(T_LONG); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ xorptr(rax, rax); // return 0 - __ ret(0); - return start; - } - - - // Helper for generating a dynamic type check. - // The sub_klass must be one of {rbx, rdx, rsi}. - // The temp is killed. - void generate_type_check(Register sub_klass, - Address& super_check_offset_addr, - Address& super_klass_addr, - Register temp, - Label* L_success, Label* L_failure) { - BLOCK_COMMENT("type_check:"); - - Label L_fallthrough; -#define LOCAL_JCC(assembler_con, label_ptr) \ - if (label_ptr != nullptr) __ jcc(assembler_con, *(label_ptr)); \ - else __ jcc(assembler_con, L_fallthrough) /*omit semi*/ - - // The following is a strange variation of the fast path which requires - // one less register, because needed values are on the argument stack. - // __ check_klass_subtype_fast_path(sub_klass, *super_klass*, temp, - // L_success, L_failure, null); - assert_different_registers(sub_klass, temp); - - int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); - - // if the pointers are equal, we are done (e.g., String[] elements) - __ cmpptr(sub_klass, super_klass_addr); - LOCAL_JCC(Assembler::equal, L_success); - - // check the supertype display: - __ movl2ptr(temp, super_check_offset_addr); - Address super_check_addr(sub_klass, temp, Address::times_1, 0); - __ movptr(temp, super_check_addr); // load displayed supertype - __ cmpptr(temp, super_klass_addr); // test the super type - LOCAL_JCC(Assembler::equal, L_success); - - // if it was a primary super, we can just fail immediately - __ cmpl(super_check_offset_addr, sc_offset); - LOCAL_JCC(Assembler::notEqual, L_failure); - - // The repne_scan instruction uses fixed registers, which will get spilled. - // We happen to know this works best when super_klass is in rax. - Register super_klass = temp; - __ movptr(super_klass, super_klass_addr); - __ check_klass_subtype_slow_path(sub_klass, super_klass, noreg, noreg, - L_success, L_failure); - - __ bind(L_fallthrough); - - if (L_success == nullptr) { BLOCK_COMMENT("L_success:"); } - if (L_failure == nullptr) { BLOCK_COMMENT("L_failure:"); } - -#undef LOCAL_JCC - } - - // - // Generate checkcasting array copy stub - // - // Input: - // 4(rsp) - source array address - // 8(rsp) - destination array address - // 12(rsp) - element count, can be zero - // 16(rsp) - size_t ckoff (super_check_offset) - // 20(rsp) - oop ckval (super_klass) - // - // Output: - // rax, == 0 - success - // rax, == -1^K - failure, where K is partial transfer count - // - address generate_checkcast_copy(const char *name, address* entry, bool dest_uninitialized = false) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Label L_load_element, L_store_element, L_do_card_marks, L_done; - - // register use: - // rax, rdx, rcx -- loop control (end_from, end_to, count) - // rdi, rsi -- element access (oop, klass) - // rbx, -- temp - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register length = rcx; // elements count - const Register elem = rdi; // each oop copied - const Register elem_klass = rsi; // each elem._klass (sub_klass) - const Register temp = rbx; // lone remaining temp - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ push(rsi); - __ push(rdi); - __ push(rbx); - - Address from_arg(rsp, 16+ 4); // from - Address to_arg(rsp, 16+ 8); // to - Address length_arg(rsp, 16+12); // elements count - Address ckoff_arg(rsp, 16+16); // super_check_offset - Address ckval_arg(rsp, 16+20); // super_klass - - // Load up: - __ movptr(from, from_arg); - __ movptr(to, to_arg); - __ movl2ptr(length, length_arg); - - if (entry != nullptr) { - *entry = __ pc(); // Entry point from generic arraycopy stub. - BLOCK_COMMENT("Entry:"); - } - - //--------------------------------------------------------------- - // Assembler stub will be used for this call to arraycopy - // if the two arrays are subtypes of Object[] but the - // destination array type is not equal to or a supertype - // of the source type. Each element must be separately - // checked. - - // Loop-invariant addresses. They are exclusive end pointers. - Address end_from_addr(from, length, Address::times_ptr, 0); - Address end_to_addr(to, length, Address::times_ptr, 0); - - Register end_from = from; // re-use - Register end_to = to; // re-use - Register count = length; // re-use - - // Loop-variant addresses. They assume post-incremented count < 0. - Address from_element_addr(end_from, count, Address::times_ptr, 0); - Address to_element_addr(end_to, count, Address::times_ptr, 0); - Address elem_klass_addr(elem, oopDesc::klass_offset_in_bytes()); - - DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST; - if (dest_uninitialized) { - decorators |= IS_DEST_UNINITIALIZED; - } - - BasicType type = T_OBJECT; - BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); - bs->arraycopy_prologue(_masm, decorators, type, from, to, count); - - // Copy from low to high addresses, indexed from the end of each array. - __ lea(end_from, end_from_addr); - __ lea(end_to, end_to_addr); - assert(length == count, ""); // else fix next line: - __ negptr(count); // negate and test the length - __ jccb(Assembler::notZero, L_load_element); - - // Empty array: Nothing to do. - __ xorptr(rax, rax); // return 0 on (trivial) success - __ jmp(L_done); - - // ======== begin loop ======== - // (Loop is rotated; its entry is L_load_element.) - // Loop control: - // for (count = -count; count != 0; count++) - // Base pointers src, dst are biased by 8*count,to last element. - __ align(OptoLoopAlignment); - - __ BIND(L_store_element); - __ movptr(to_element_addr, elem); // store the oop - __ increment(count); // increment the count toward zero - __ jccb(Assembler::zero, L_do_card_marks); - - // ======== loop entry is here ======== - __ BIND(L_load_element); - __ movptr(elem, from_element_addr); // load the oop - __ testptr(elem, elem); - __ jccb(Assembler::zero, L_store_element); - - // (Could do a trick here: Remember last successful non-null - // element stored and make a quick oop equality check on it.) - - __ movptr(elem_klass, elem_klass_addr); // query the object klass - generate_type_check(elem_klass, ckoff_arg, ckval_arg, temp, - &L_store_element, nullptr); - // (On fall-through, we have failed the element type check.) - // ======== end loop ======== - - // It was a real error; we must depend on the caller to finish the job. - // Register "count" = -1 * number of *remaining* oops, length_arg = *total* oops. - // Emit GC store barriers for the oops we have copied (length_arg + count), - // and report their number to the caller. - assert_different_registers(to, count, rax); - Label L_post_barrier; - __ addl(count, length_arg); // transfers = (length - remaining) - __ movl2ptr(rax, count); // save the value - __ notptr(rax); // report (-1^K) to caller (does not affect flags) - __ jccb(Assembler::notZero, L_post_barrier); - __ jmp(L_done); // K == 0, nothing was copied, skip post barrier - - // Come here on success only. - __ BIND(L_do_card_marks); - __ xorptr(rax, rax); // return 0 on success - __ movl2ptr(count, length_arg); - - __ BIND(L_post_barrier); - __ movptr(to, to_arg); // reload - bs->arraycopy_epilogue(_masm, decorators, type, from, to, count); - - // Common exit point (success or failure). - __ BIND(L_done); - __ pop(rbx); - __ pop(rdi); - __ pop(rsi); - inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - // - // Generate 'unsafe' array copy stub - // Though just as safe as the other stubs, it takes an unscaled - // size_t argument instead of an element count. - // - // Input: - // 4(rsp) - source array address - // 8(rsp) - destination array address - // 12(rsp) - byte count, can be zero - // - // Output: - // rax, == 0 - success - // rax, == -1 - need to call System.arraycopy - // - // Examines the alignment of the operands and dispatches - // to a long, int, short, or byte copy loop. - // - address generate_unsafe_copy(const char *name, - address byte_copy_entry, - address short_copy_entry, - address int_copy_entry, - address long_copy_entry) { - - Label L_long_aligned, L_int_aligned, L_short_aligned; - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - const Register from = rax; // source array address - const Register to = rdx; // destination array address - const Register count = rcx; // elements count - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - Address from_arg(rsp, 12+ 4); // from - Address to_arg(rsp, 12+ 8); // to - Address count_arg(rsp, 12+12); // byte count - - // Load up: - __ movptr(from , from_arg); - __ movptr(to , to_arg); - __ movl2ptr(count, count_arg); - - // bump this on entry, not on exit: - inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr); - - const Register bits = rsi; - __ mov(bits, from); - __ orptr(bits, to); - __ orptr(bits, count); - - __ testl(bits, BytesPerLong-1); - __ jccb(Assembler::zero, L_long_aligned); - - __ testl(bits, BytesPerInt-1); - __ jccb(Assembler::zero, L_int_aligned); - - __ testl(bits, BytesPerShort-1); - __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry)); - - __ BIND(L_short_aligned); - __ shrptr(count, LogBytesPerShort); // size => short_count - __ movl(count_arg, count); // update 'count' - __ jump(RuntimeAddress(short_copy_entry)); - - __ BIND(L_int_aligned); - __ shrptr(count, LogBytesPerInt); // size => int_count - __ movl(count_arg, count); // update 'count' - __ jump(RuntimeAddress(int_copy_entry)); - - __ BIND(L_long_aligned); - __ shrptr(count, LogBytesPerLong); // size => qword_count - __ movl(count_arg, count); // update 'count' - __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. - __ pop(rsi); - __ jump(RuntimeAddress(long_copy_entry)); - - return start; - } - - - // Perform range checks on the proposed arraycopy. - // Smashes src_pos and dst_pos. (Uses them up for temps.) - void arraycopy_range_checks(Register src, - Register src_pos, - Register dst, - Register dst_pos, - Address& length, - Label& L_failed) { - BLOCK_COMMENT("arraycopy_range_checks:"); - const Register src_end = src_pos; // source array end position - const Register dst_end = dst_pos; // destination array end position - __ addl(src_end, length); // src_pos + length - __ addl(dst_end, length); // dst_pos + length - - // if (src_pos + length > arrayOop(src)->length() ) FAIL; - __ cmpl(src_end, Address(src, arrayOopDesc::length_offset_in_bytes())); - __ jcc(Assembler::above, L_failed); - - // if (dst_pos + length > arrayOop(dst)->length() ) FAIL; - __ cmpl(dst_end, Address(dst, arrayOopDesc::length_offset_in_bytes())); - __ jcc(Assembler::above, L_failed); - - BLOCK_COMMENT("arraycopy_range_checks done"); - } - - - // - // Generate generic array copy stubs - // - // Input: - // 4(rsp) - src oop - // 8(rsp) - src_pos - // 12(rsp) - dst oop - // 16(rsp) - dst_pos - // 20(rsp) - element count - // - // Output: - // rax, == 0 - success - // rax, == -1^K - failure, where K is partial transfer count - // - address generate_generic_copy(const char *name, - address entry_jbyte_arraycopy, - address entry_jshort_arraycopy, - address entry_jint_arraycopy, - address entry_oop_arraycopy, - address entry_jlong_arraycopy, - address entry_checkcast_arraycopy) { - Label L_failed, L_failed_0, L_objArray; - - { int modulus = CodeEntryAlignment; - int target = modulus - 5; // 5 = sizeof jmp(L_failed) - int advance = target - (__ offset() % modulus); - if (advance < 0) advance += modulus; - if (advance > 0) __ nop(advance); - } - StubCodeMark mark(this, "StubRoutines", name); - - // Short-hop target to L_failed. Makes for denser prologue code. - __ BIND(L_failed_0); - __ jmp(L_failed); - assert(__ offset() % CodeEntryAlignment == 0, "no further alignment needed"); - - __ align(CodeEntryAlignment); - address start = __ pc(); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - - // bump this on entry, not on exit: - inc_counter_np(SharedRuntime::_generic_array_copy_ctr); - - // Input values - Address SRC (rsp, 12+ 4); - Address SRC_POS (rsp, 12+ 8); - Address DST (rsp, 12+12); - Address DST_POS (rsp, 12+16); - Address LENGTH (rsp, 12+20); - - //----------------------------------------------------------------------- - // Assembler stub will be used for this call to arraycopy - // if the following conditions are met: - // - // (1) src and dst must not be null. - // (2) src_pos must not be negative. - // (3) dst_pos must not be negative. - // (4) length must not be negative. - // (5) src klass and dst klass should be the same and not null. - // (6) src and dst should be arrays. - // (7) src_pos + length must not exceed length of src. - // (8) dst_pos + length must not exceed length of dst. - // - - const Register src = rax; // source array oop - const Register src_pos = rsi; - const Register dst = rdx; // destination array oop - const Register dst_pos = rdi; - const Register length = rcx; // transfer count - - // if (src == null) return -1; - __ movptr(src, SRC); // src oop - __ testptr(src, src); - __ jccb(Assembler::zero, L_failed_0); - - // if (src_pos < 0) return -1; - __ movl2ptr(src_pos, SRC_POS); // src_pos - __ testl(src_pos, src_pos); - __ jccb(Assembler::negative, L_failed_0); - - // if (dst == nullptr) return -1; - __ movptr(dst, DST); // dst oop - __ testptr(dst, dst); - __ jccb(Assembler::zero, L_failed_0); - - // if (dst_pos < 0) return -1; - __ movl2ptr(dst_pos, DST_POS); // dst_pos - __ testl(dst_pos, dst_pos); - __ jccb(Assembler::negative, L_failed_0); - - // if (length < 0) return -1; - __ movl2ptr(length, LENGTH); // length - __ testl(length, length); - __ jccb(Assembler::negative, L_failed_0); - - // if (src->klass() == nullptr) return -1; - Address src_klass_addr(src, oopDesc::klass_offset_in_bytes()); - Address dst_klass_addr(dst, oopDesc::klass_offset_in_bytes()); - const Register rcx_src_klass = rcx; // array klass - __ movptr(rcx_src_klass, Address(src, oopDesc::klass_offset_in_bytes())); - -#ifdef ASSERT - // assert(src->klass() != nullptr); - BLOCK_COMMENT("assert klasses not null"); - { Label L1, L2; - __ testptr(rcx_src_klass, rcx_src_klass); - __ jccb(Assembler::notZero, L2); // it is broken if klass is null - __ bind(L1); - __ stop("broken null klass"); - __ bind(L2); - __ cmpptr(dst_klass_addr, NULL_WORD); - __ jccb(Assembler::equal, L1); // this would be broken also - BLOCK_COMMENT("assert done"); - } -#endif //ASSERT - - // Load layout helper (32-bits) - // - // |array_tag| | header_size | element_type | |log2_element_size| - // 32 30 24 16 8 2 0 - // - // array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0 - // - - int lh_offset = in_bytes(Klass::layout_helper_offset()); - Address src_klass_lh_addr(rcx_src_klass, lh_offset); - - // Handle objArrays completely differently... - jint objArray_lh = Klass::array_layout_helper(T_OBJECT); - __ cmpl(src_klass_lh_addr, objArray_lh); - __ jcc(Assembler::equal, L_objArray); - - // if (src->klass() != dst->klass()) return -1; - __ cmpptr(rcx_src_klass, dst_klass_addr); - __ jccb(Assembler::notEqual, L_failed_0); - - const Register rcx_lh = rcx; // layout helper - assert(rcx_lh == rcx_src_klass, "known alias"); - __ movl(rcx_lh, src_klass_lh_addr); - - // if (!src->is_Array()) return -1; - __ cmpl(rcx_lh, Klass::_lh_neutral_value); - __ jcc(Assembler::greaterEqual, L_failed_0); // signed cmp - - // At this point, it is known to be a typeArray (array_tag 0x3). -#ifdef ASSERT - { Label L; - __ cmpl(rcx_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift)); - __ jcc(Assembler::greaterEqual, L); // signed cmp - __ stop("must be a primitive array"); - __ bind(L); - } -#endif - - assert_different_registers(src, src_pos, dst, dst_pos, rcx_lh); - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - - // TypeArrayKlass - // - // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize); - // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize); - // - const Register rsi_offset = rsi; // array offset - const Register src_array = src; // src array offset - const Register dst_array = dst; // dst array offset - const Register rdi_elsize = rdi; // log2 element size - - __ mov(rsi_offset, rcx_lh); - __ shrptr(rsi_offset, Klass::_lh_header_size_shift); - __ andptr(rsi_offset, Klass::_lh_header_size_mask); // array_offset - __ addptr(src_array, rsi_offset); // src array offset - __ addptr(dst_array, rsi_offset); // dst array offset - __ andptr(rcx_lh, Klass::_lh_log2_element_size_mask); // log2 elsize - - // next registers should be set before the jump to corresponding stub - const Register from = src; // source array address - const Register to = dst; // destination array address - const Register count = rcx; // elements count - // some of them should be duplicated on stack -#define FROM Address(rsp, 12+ 4) -#define TO Address(rsp, 12+ 8) // Not used now -#define COUNT Address(rsp, 12+12) // Only for oop arraycopy - - BLOCK_COMMENT("scale indexes to element size"); - __ movl2ptr(rsi, SRC_POS); // src_pos - __ shlptr(rsi); // src_pos << rcx (log2 elsize) - assert(src_array == from, ""); - __ addptr(from, rsi); // from = src_array + SRC_POS << log2 elsize - __ movl2ptr(rdi, DST_POS); // dst_pos - __ shlptr(rdi); // dst_pos << rcx (log2 elsize) - assert(dst_array == to, ""); - __ addptr(to, rdi); // to = dst_array + DST_POS << log2 elsize - __ movptr(FROM, from); // src_addr - __ mov(rdi_elsize, rcx_lh); // log2 elsize - __ movl2ptr(count, LENGTH); // elements count - - BLOCK_COMMENT("choose copy loop based on element size"); - __ cmpl(rdi_elsize, 0); - - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jbyte_arraycopy)); - __ cmpl(rdi_elsize, LogBytesPerShort); - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jshort_arraycopy)); - __ cmpl(rdi_elsize, LogBytesPerInt); - __ jump_cc(Assembler::equal, RuntimeAddress(entry_jint_arraycopy)); -#ifdef ASSERT - __ cmpl(rdi_elsize, LogBytesPerLong); - __ jccb(Assembler::notEqual, L_failed); -#endif - __ pop(rdi); // Do pops here since jlong_arraycopy stub does not do it. - __ pop(rsi); - __ jump(RuntimeAddress(entry_jlong_arraycopy)); - - __ BIND(L_failed); - __ xorptr(rax, rax); - __ notptr(rax); // return -1 - __ pop(rdi); - __ pop(rsi); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - // ObjArrayKlass - __ BIND(L_objArray); - // live at this point: rcx_src_klass, src[_pos], dst[_pos] - - Label L_plain_copy, L_checkcast_copy; - // test array classes for subtyping - __ cmpptr(rcx_src_klass, dst_klass_addr); // usual case is exact equality - __ jccb(Assembler::notEqual, L_checkcast_copy); - - // Identically typed arrays can be copied without element-wise checks. - assert_different_registers(src, src_pos, dst, dst_pos, rcx_src_klass); - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - - __ BIND(L_plain_copy); - __ movl2ptr(count, LENGTH); // elements count - __ movl2ptr(src_pos, SRC_POS); // reload src_pos - __ lea(from, Address(src, src_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr - __ movl2ptr(dst_pos, DST_POS); // reload dst_pos - __ lea(to, Address(dst, dst_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr - __ movptr(FROM, from); // src_addr - __ movptr(TO, to); // dst_addr - __ movl(COUNT, count); // count - __ jump(RuntimeAddress(entry_oop_arraycopy)); - - __ BIND(L_checkcast_copy); - // live at this point: rcx_src_klass, dst[_pos], src[_pos] - { - // Handy offsets: - int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset()); - int sco_offset = in_bytes(Klass::super_check_offset_offset()); - - Register rsi_dst_klass = rsi; - Register rdi_temp = rdi; - assert(rsi_dst_klass == src_pos, "expected alias w/ src_pos"); - assert(rdi_temp == dst_pos, "expected alias w/ dst_pos"); - Address dst_klass_lh_addr(rsi_dst_klass, lh_offset); - - // Before looking at dst.length, make sure dst is also an objArray. - __ movptr(rsi_dst_klass, dst_klass_addr); - __ cmpl(dst_klass_lh_addr, objArray_lh); - __ jccb(Assembler::notEqual, L_failed); - - // It is safe to examine both src.length and dst.length. - __ movl2ptr(src_pos, SRC_POS); // reload rsi - arraycopy_range_checks(src, src_pos, dst, dst_pos, LENGTH, L_failed); - // (Now src_pos and dst_pos are killed, but not src and dst.) - - // We'll need this temp (don't forget to pop it after the type check). - __ push(rbx); - Register rbx_src_klass = rbx; - - __ mov(rbx_src_klass, rcx_src_klass); // spill away from rcx - __ movptr(rsi_dst_klass, dst_klass_addr); - Address super_check_offset_addr(rsi_dst_klass, sco_offset); - Label L_fail_array_check; - generate_type_check(rbx_src_klass, - super_check_offset_addr, dst_klass_addr, - rdi_temp, nullptr, &L_fail_array_check); - // (On fall-through, we have passed the array type check.) - __ pop(rbx); - __ jmp(L_plain_copy); - - __ BIND(L_fail_array_check); - // Reshuffle arguments so we can call checkcast_arraycopy: - - // match initial saves for checkcast_arraycopy - // push(rsi); // already done; see above - // push(rdi); // already done; see above - // push(rbx); // already done; see above - - // Marshal outgoing arguments now, freeing registers. - Address from_arg(rsp, 16+ 4); // from - Address to_arg(rsp, 16+ 8); // to - Address length_arg(rsp, 16+12); // elements count - Address ckoff_arg(rsp, 16+16); // super_check_offset - Address ckval_arg(rsp, 16+20); // super_klass - - Address SRC_POS_arg(rsp, 16+ 8); - Address DST_POS_arg(rsp, 16+16); - Address LENGTH_arg(rsp, 16+20); - // push rbx, changed the incoming offsets (why not just use rbp,??) - // assert(SRC_POS_arg.disp() == SRC_POS.disp() + 4, ""); - - __ movptr(rbx, Address(rsi_dst_klass, ek_offset)); - __ movl2ptr(length, LENGTH_arg); // reload elements count - __ movl2ptr(src_pos, SRC_POS_arg); // reload src_pos - __ movl2ptr(dst_pos, DST_POS_arg); // reload dst_pos - - __ movptr(ckval_arg, rbx); // destination element type - __ movl(rbx, Address(rbx, sco_offset)); - __ movl(ckoff_arg, rbx); // corresponding class check offset - - __ movl(length_arg, length); // outgoing length argument - - __ lea(from, Address(src, src_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); - __ movptr(from_arg, from); - - __ lea(to, Address(dst, dst_pos, Address::times_ptr, - arrayOopDesc::base_offset_in_bytes(T_OBJECT))); - __ movptr(to_arg, to); - __ jump(RuntimeAddress(entry_checkcast_arraycopy)); - } - - return start; - } - - void generate_arraycopy_stubs() { - address entry; - address entry_jbyte_arraycopy; - address entry_jshort_arraycopy; - address entry_jint_arraycopy; - address entry_oop_arraycopy; - address entry_jlong_arraycopy; - address entry_checkcast_arraycopy; - - StubRoutines::_arrayof_jbyte_disjoint_arraycopy = - generate_disjoint_copy(T_BYTE, true, Address::times_1, &entry, - "arrayof_jbyte_disjoint_arraycopy"); - StubRoutines::_arrayof_jbyte_arraycopy = - generate_conjoint_copy(T_BYTE, true, Address::times_1, entry, - nullptr, "arrayof_jbyte_arraycopy"); - StubRoutines::_jbyte_disjoint_arraycopy = - generate_disjoint_copy(T_BYTE, false, Address::times_1, &entry, - "jbyte_disjoint_arraycopy"); - StubRoutines::_jbyte_arraycopy = - generate_conjoint_copy(T_BYTE, false, Address::times_1, entry, - &entry_jbyte_arraycopy, "jbyte_arraycopy"); - - StubRoutines::_arrayof_jshort_disjoint_arraycopy = - generate_disjoint_copy(T_SHORT, true, Address::times_2, &entry, - "arrayof_jshort_disjoint_arraycopy"); - StubRoutines::_arrayof_jshort_arraycopy = - generate_conjoint_copy(T_SHORT, true, Address::times_2, entry, - nullptr, "arrayof_jshort_arraycopy"); - StubRoutines::_jshort_disjoint_arraycopy = - generate_disjoint_copy(T_SHORT, false, Address::times_2, &entry, - "jshort_disjoint_arraycopy"); - StubRoutines::_jshort_arraycopy = - generate_conjoint_copy(T_SHORT, false, Address::times_2, entry, - &entry_jshort_arraycopy, "jshort_arraycopy"); - - // Next arrays are always aligned on 4 bytes at least. - StubRoutines::_jint_disjoint_arraycopy = - generate_disjoint_copy(T_INT, true, Address::times_4, &entry, - "jint_disjoint_arraycopy"); - StubRoutines::_jint_arraycopy = - generate_conjoint_copy(T_INT, true, Address::times_4, entry, - &entry_jint_arraycopy, "jint_arraycopy"); - - StubRoutines::_oop_disjoint_arraycopy = - generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, - "oop_disjoint_arraycopy"); - StubRoutines::_oop_arraycopy = - generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, - &entry_oop_arraycopy, "oop_arraycopy"); - - StubRoutines::_oop_disjoint_arraycopy_uninit = - generate_disjoint_copy(T_OBJECT, true, Address::times_ptr, &entry, - "oop_disjoint_arraycopy_uninit", - /*dest_uninitialized*/true); - StubRoutines::_oop_arraycopy_uninit = - generate_conjoint_copy(T_OBJECT, true, Address::times_ptr, entry, - nullptr, "oop_arraycopy_uninit", - /*dest_uninitialized*/true); - - StubRoutines::_jlong_disjoint_arraycopy = - generate_disjoint_long_copy(&entry, "jlong_disjoint_arraycopy"); - StubRoutines::_jlong_arraycopy = - generate_conjoint_long_copy(entry, &entry_jlong_arraycopy, - "jlong_arraycopy"); - - StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); - StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); - StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); - StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); - StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); - StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); - - StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; - StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; - StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; - StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; - - StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; - StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; - StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; - StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; - - StubRoutines::_checkcast_arraycopy = - generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy); - StubRoutines::_checkcast_arraycopy_uninit = - generate_checkcast_copy("checkcast_arraycopy_uninit", nullptr, /*dest_uninitialized*/true); - - StubRoutines::_unsafe_arraycopy = - generate_unsafe_copy("unsafe_arraycopy", - entry_jbyte_arraycopy, - entry_jshort_arraycopy, - entry_jint_arraycopy, - entry_jlong_arraycopy); - - StubRoutines::_generic_arraycopy = - generate_generic_copy("generic_arraycopy", - entry_jbyte_arraycopy, - entry_jshort_arraycopy, - entry_jint_arraycopy, - entry_oop_arraycopy, - entry_jlong_arraycopy, - entry_checkcast_arraycopy); - } - - // AES intrinsic stubs - enum {AESBlockSize = 16}; - - address key_shuffle_mask_addr() { - return (address)KEY_SHUFFLE_MASK; - } - - address counter_shuffle_mask_addr() { - return (address)COUNTER_SHUFFLE_MASK; - } - - // Utility routine for loading a 128-bit key word in little endian format - // can optionally specify that the shuffle mask is already in an xmmregister - void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - __ movdqu(xmmdst, Address(key, offset)); - if (xmm_shuf_mask != xnoreg) { - __ pshufb(xmmdst, xmm_shuf_mask); - } else { - __ pshufb(xmmdst, ExternalAddress(key_shuffle_mask_addr())); - } - } - - // aesenc using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesenc(xmmdst, xmmtmp); - } - - // aesdec using specified key+offset - // can optionally specify that the shuffle mask is already in an xmmregister - void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask = xnoreg) { - load_key(xmmtmp, key, offset, xmm_shuf_mask); - __ aesdec(xmmdst, xmmtmp); - } - - // Utility routine for increase 128bit counter (iv in CTR mode) - // XMM_128bit, D3, D2, D1, D0 - void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) { - __ pextrd(reg, xmmdst, 0x0); - __ addl(reg, inc_delta); - __ pinsrd(xmmdst, reg, 0x0); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x01); // Carry-> D1 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x01); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x02); // Carry-> D2 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x02); - __ jcc(Assembler::carryClear, next_block); // jump if no carry - - __ pextrd(reg, xmmdst, 0x03); // Carry -> D3 - __ addl(reg, 0x01); - __ pinsrd(xmmdst, reg, 0x03); - - __ BIND(next_block); // next instruction - } - - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_encryptBlock() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock"); - Label L_doLast; - address start = __ pc(); - - const Register from = rdx; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register keylen = rax; - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_key_shuf_mask = xmm1; - const XMMRegister xmm_temp1 = xmm2; - const XMMRegister xmm_temp2 = xmm3; - const XMMRegister xmm_temp3 = xmm4; - const XMMRegister xmm_temp4 = xmm5; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ movptr(from, from_param); - __ movptr(key, key_param); - - // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_result, Address(from, 0)); // get 16 bytes of input - __ movptr(to, to_param); - - // For encryption, the java expanded key ordering is just what we need - - load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask); - __ pxor(xmm_result, xmm_temp1); - - load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - __ aesenc(xmm_result, xmm_temp3); - __ aesenc(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - __ aesenc(xmm_result, xmm_temp3); - __ aesenc(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); - - __ cmpl(keylen, 44); - __ jccb(Assembler::equal, L_doLast); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); - - __ cmpl(keylen, 52); - __ jccb(Assembler::equal, L_doLast); - - __ aesenc(xmm_result, xmm_temp1); - __ aesenc(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); - - __ BIND(L_doLast); - __ aesenc(xmm_result, xmm_temp1); - __ aesenclast(xmm_result, xmm_temp2); - __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // - address generate_aescrypt_decryptBlock() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock"); - Label L_doLast; - address start = __ pc(); - - const Register from = rdx; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register keylen = rax; - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_key_shuf_mask = xmm1; - const XMMRegister xmm_temp1 = xmm2; - const XMMRegister xmm_temp2 = xmm3; - const XMMRegister xmm_temp3 = xmm4; - const XMMRegister xmm_temp4 = xmm5; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ movptr(from, from_param); - __ movptr(key, key_param); - - // keylen could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_result, Address(from, 0)); - __ movptr(to, to_param); - - // for decryption java expanded key ordering is rotated one position from what we want - // so we start from 0x10 here and hit 0x00 last - // we don't know if the key is aligned, hence not using load-execute form - load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask); - - __ pxor (xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - __ aesdec(xmm_result, xmm_temp3); - __ aesdec(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask); - load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - __ aesdec(xmm_result, xmm_temp3); - __ aesdec(xmm_result, xmm_temp4); - - load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask); - load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask); - - __ cmpl(keylen, 44); - __ jccb(Assembler::equal, L_doLast); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask); - - __ cmpl(keylen, 52); - __ jccb(Assembler::equal, L_doLast); - - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask); - load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask); - - __ BIND(L_doLast); - __ aesdec(xmm_result, xmm_temp1); - __ aesdec(xmm_result, xmm_temp2); - - // for decryption the aesdeclast operation is always on key+0x00 - __ aesdeclast(xmm_result, xmm_temp3); - __ movdqu(Address(to, 0), xmm_result); // store the result - __ xorptr(rax, rax); // return 0 - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - void handleSOERegisters(bool saving) { - const int saveFrameSizeInBytes = 4 * wordSize; - const Address saved_rbx (rbp, -3 * wordSize); - const Address saved_rsi (rbp, -2 * wordSize); - const Address saved_rdi (rbp, -1 * wordSize); - - if (saving) { - __ subptr(rsp, saveFrameSizeInBytes); - __ movptr(saved_rsi, rsi); - __ movptr(saved_rdi, rdi); - __ movptr(saved_rbx, rbx); - } else { - // restoring - __ movptr(rsi, saved_rsi); - __ movptr(rdi, saved_rdi); - __ movptr(rbx, saved_rbx); - } - } - - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - address generate_cipherBlockChaining_encryptAESCrypt() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt"); - address start = __ pc(); - - Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256; - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register rvec = rdi; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = rbx; // src len (must be multiple of blocksize 16) - const Register pos = rax; - - // xmm register assignments for the loops below - const XMMRegister xmm_result = xmm0; - const XMMRegister xmm_temp = xmm1; - // first 6 keys preloaded into xmm2-xmm7 - const int XMM_REG_NUM_KEY_FIRST = 2; - const int XMM_REG_NUM_KEY_LAST = 7; - const XMMRegister xmm_key0 = as_XMMRegister(XMM_REG_NUM_KEY_FIRST); - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - __ movptr(rvec , rvec_param); - __ movptr(len_reg , len_param); - - const XMMRegister xmm_key_shuf_mask = xmm_temp; // used temporarily to swap key bytes up front - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - // load up xmm regs 2 thru 7 with keys 0-5 - for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask); - offset += 0x10; - } - - __ movdqu(xmm_result, Address(rvec, 0x00)); // initialize xmm_result with r vec - - // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) - __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rax, 44); - __ jcc(Assembler::notEqual, L_key_192_256); - - // 128 bit code follows here - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_128); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xa0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_128); - - __ BIND(L_exit); - __ movdqu(Address(rvec, 0), xmm_result); // final value of r stored in rvec of CipherBlockChaining object - - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - __ BIND(L_key_192_256); - // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256) - __ cmpl(rax, 52); - __ jcc(Assembler::notEqual, L_key_256); - - // 192-bit code follows here (could be changed to use more xmm registers) - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_192); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xc0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_192); - __ jmp(L_exit); - - __ BIND(L_key_256); - // 256-bit code follows here (could be changed to use more xmm registers) - __ movl(pos, 0); - __ align(OptoLoopAlignment); - __ BIND(L_loopTop_256); - __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of input - __ pxor (xmm_result, xmm_temp); // xor with the current r vector - - __ pxor (xmm_result, xmm_key0); // do the aes rounds - for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) { - __ aesenc(xmm_result, as_XMMRegister(rnum)); - } - for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) { - aes_enc_key(xmm_result, xmm_temp, key, key_offset); - } - load_key(xmm_temp, key, 0xe0); - __ aesenclast(xmm_result, xmm_temp); - - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jcc(Assembler::notEqual, L_loopTop_256); - __ jmp(L_exit); - - return start; - } - - - // CBC AES Decryption. - // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time. - // - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - r vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - - address generate_cipherBlockChaining_decryptAESCrypt_Parallel() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt"); - address start = __ pc(); - - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register rvec = rdi; // r byte array initialized from initvector array address - // and left with the results of the last encryption block - const Register len_reg = rbx; // src len (must be multiple of blocksize 16) - const Register pos = rax; - - const int PARALLEL_FACTOR = 4; - const int ROUNDS[3] = { 10, 12, 14 }; //aes rounds for key128, key192, key256 - - Label L_exit; - Label L_singleBlock_loopTop[3]; //128, 192, 256 - Label L_multiBlock_loopTop[3]; //128, 192, 256 - - const XMMRegister xmm_prev_block_cipher = xmm0; // holds cipher of previous block - const XMMRegister xmm_key_shuf_mask = xmm1; - - const XMMRegister xmm_key_tmp0 = xmm2; - const XMMRegister xmm_key_tmp1 = xmm3; - - // registers holding the six results in the parallelized loop - const XMMRegister xmm_result0 = xmm4; - const XMMRegister xmm_result1 = xmm5; - const XMMRegister xmm_result2 = xmm6; - const XMMRegister xmm_result3 = xmm7; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(key , key_param); - __ movptr(rvec , rvec_param); - __ movptr(len_reg , len_param); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00)); // initialize with initial rvec - - __ xorptr(pos, pos); - - // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256)) - // rvec is reused - __ movl(rvec, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rvec, 52); - __ jcc(Assembler::equal, L_multiBlock_loopTop[1]); - __ cmpl(rvec, 60); - __ jcc(Assembler::equal, L_multiBlock_loopTop[2]); - -#define DoFour(opc, src_reg) \ - __ opc(xmm_result0, src_reg); \ - __ opc(xmm_result1, src_reg); \ - __ opc(xmm_result2, src_reg); \ - __ opc(xmm_result3, src_reg); \ - - for (int k = 0; k < 3; ++k) { - __ align(OptoLoopAlignment); - __ BIND(L_multiBlock_loopTop[k]); - __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left - __ jcc(Assembler::less, L_singleBlock_loopTop[k]); - - __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers - __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); - - // the java expanded key ordering is rotated one position from what we want - // so we start from 0x10 here and hit 0x00 last - load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); - DoFour(pxor, xmm_key_tmp0); //xor with first key - // do the aes dec rounds - for (int rnum = 1; rnum <= ROUNDS[k];) { - //load two keys at a time - //k1->0x20, ..., k9->0xa0, k10->0x00 - load_key(xmm_key_tmp1, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); - load_key(xmm_key_tmp0, key, ((rnum + 2) % (ROUNDS[k] + 1)) * 0x10, xmm_key_shuf_mask); // hit 0x00 last! - DoFour(aesdec, xmm_key_tmp1); - rnum++; - if (rnum != ROUNDS[k]) { - DoFour(aesdec, xmm_key_tmp0); - } - else { - DoFour(aesdeclast, xmm_key_tmp0); - } - rnum++; - } - - // for each result, xor with the r vector of previous cipher block - __ pxor(xmm_result0, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ pxor(xmm_result1, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ pxor(xmm_result2, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - __ pxor(xmm_result3, xmm_prev_block_cipher); - __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize)); // this will carry over to next set of blocks - - // store 4 results into the next 64 bytes of output - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); - __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); - __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); - - __ addptr(pos, 4 * AESBlockSize); - __ subptr(len_reg, 4 * AESBlockSize); - __ jmp(L_multiBlock_loopTop[k]); - - //singleBlock starts here - __ align(OptoLoopAlignment); - __ BIND(L_singleBlock_loopTop[k]); - __ cmpptr(len_reg, 0); // any blocks left? - __ jcc(Assembler::equal, L_exit); - __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input - __ movdqa(xmm_result1, xmm_result0); - - load_key(xmm_key_tmp0, key, 0x10, xmm_key_shuf_mask); - __ pxor(xmm_result0, xmm_key_tmp0); - // do the aes dec rounds - for (int rnum = 1; rnum < ROUNDS[k]; rnum++) { - // the java expanded key ordering is rotated one position from what we want - load_key(xmm_key_tmp0, key, (rnum + 1) * 0x10, xmm_key_shuf_mask); - __ aesdec(xmm_result0, xmm_key_tmp0); - } - load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask); - __ aesdeclast(xmm_result0, xmm_key_tmp0); - __ pxor(xmm_result0, xmm_prev_block_cipher); // xor with the current r vector - __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result0); // store into the next 16 bytes of output - // no need to store r to memory until we exit - __ movdqa(xmm_prev_block_cipher, xmm_result1); // set up next r vector with cipher input from this block - - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jmp(L_singleBlock_loopTop[k]); - }//for 128/192/256 - - __ BIND(L_exit); - __ movptr(rvec, rvec_param); // restore this since reused earlier - __ movdqu(Address(rvec, 0), xmm_prev_block_cipher); // final value of r stored in rvec of CipherBlockChaining object - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - // CTR AES crypt. - // In 32-bit stub, parallelize 4 blocks at a time - // Arguments: - // - // Inputs: - // c_rarg0 - source byte array address - // c_rarg1 - destination byte array address - // c_rarg2 - K (key) in little endian int array - // c_rarg3 - counter vector byte array address - // c_rarg4 - input length - // - // Output: - // rax - input length - // - address generate_counterMode_AESCrypt_Parallel() { - assert(UseAES, "need AES instructions and misaligned SSE support"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt"); - address start = __ pc(); - const Register from = rsi; // source array address - const Register to = rdx; // destination array address - const Register key = rcx; // key array address - const Register counter = rdi; // counter byte array initialized from initvector array address - // and updated with the incremented counter in the end - const Register len_reg = rbx; - const Register pos = rax; - - __ enter(); // required for proper stackwalking of RuntimeStub frame - handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi - - // load registers from incoming parameters - const Address from_param(rbp, 8+0); - const Address to_param (rbp, 8+4); - const Address key_param (rbp, 8+8); - const Address rvec_param (rbp, 8+12); - const Address len_param (rbp, 8+16); - const Address saved_counter_param(rbp, 8 + 20); - const Address used_addr_param(rbp, 8 + 24); - - __ movptr(from , from_param); - __ movptr(to , to_param); - __ movptr(len_reg , len_param); - - // Use the partially used encrpyted counter from last invocation - Label L_exit_preLoop, L_preLoop_start; - - // Use the registers 'counter' and 'key' here in this preloop - // to hold of last 2 params 'used' and 'saved_encCounter_start' - Register used = counter; - Register saved_encCounter_start = key; - Register used_addr = saved_encCounter_start; - - __ movptr(used_addr, used_addr_param); - __ movptr(used, Address(used_addr, 0)); - __ movptr(saved_encCounter_start, saved_counter_param); - - __ BIND(L_preLoop_start); - __ cmpptr(used, 16); - __ jcc(Assembler::aboveEqual, L_exit_preLoop); - __ cmpptr(len_reg, 0); - __ jcc(Assembler::lessEqual, L_exit_preLoop); - __ movb(rax, Address(saved_encCounter_start, used)); - __ xorb(rax, Address(from, 0)); - __ movb(Address(to, 0), rax); - __ addptr(from, 1); - __ addptr(to, 1); - __ addptr(used, 1); - __ subptr(len_reg, 1); - - __ jmp(L_preLoop_start); - - __ BIND(L_exit_preLoop); - __ movptr(used_addr, used_addr_param); - __ movptr(used_addr, used_addr_param); - __ movl(Address(used_addr, 0), used); - - // load the parameters 'key' and 'counter' - __ movptr(key, key_param); - __ movptr(counter, rvec_param); - - // xmm register assignments for the loops below - const XMMRegister xmm_curr_counter = xmm0; - const XMMRegister xmm_counter_shuf_mask = xmm1; // need to be reloaded - const XMMRegister xmm_key_shuf_mask = xmm2; // need to be reloaded - const XMMRegister xmm_key = xmm3; - const XMMRegister xmm_result0 = xmm4; - const XMMRegister xmm_result1 = xmm5; - const XMMRegister xmm_result2 = xmm6; - const XMMRegister xmm_result3 = xmm7; - const XMMRegister xmm_from0 = xmm1; //reuse XMM register - const XMMRegister xmm_from1 = xmm2; - const XMMRegister xmm_from2 = xmm3; - const XMMRegister xmm_from3 = xmm4; - - //for key_128, key_192, key_256 - const int rounds[3] = {10, 12, 14}; - Label L_singleBlockLoopTop[3]; - Label L_multiBlock_loopTop[3]; - Label L_key192_top, L_key256_top; - Label L_incCounter[3][4]; // 3: different key length, 4: 4 blocks at a time - Label L_incCounter_single[3]; //for single block, key128, key192, key256 - Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3]; - Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3]; - - Label L_exit; - const int PARALLEL_FACTOR = 4; //because of the limited register number - - // initialize counter with initial counter - __ movdqu(xmm_curr_counter, Address(counter, 0x00)); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled for increase - - // key length could be only {11, 13, 15} * 4 = {44, 52, 60} - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); - __ cmpl(rax, 52); - __ jcc(Assembler::equal, L_key192_top); - __ cmpl(rax, 60); - __ jcc(Assembler::equal, L_key256_top); - - //key128 begins here - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - -#define CTR_DoFour(opc, src_reg) \ - __ opc(xmm_result0, src_reg); \ - __ opc(xmm_result1, src_reg); \ - __ opc(xmm_result2, src_reg); \ - __ opc(xmm_result3, src_reg); - - // k == 0 : generate code for key_128 - // k == 1 : generate code for key_192 - // k == 2 : generate code for key_256 - for (int k = 0; k < 3; ++k) { - //multi blocks starts here - __ align(OptoLoopAlignment); - __ BIND(L_multiBlock_loopTop[k]); - __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left - __ jcc(Assembler::less, L_singleBlockLoopTop[k]); - - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - - //load, then increase counters - CTR_DoFour(movdqa, xmm_curr_counter); - __ push(rbx); - inc_counter(rbx, xmm_result1, 0x01, L_incCounter[k][0]); - inc_counter(rbx, xmm_result2, 0x02, L_incCounter[k][1]); - inc_counter(rbx, xmm_result3, 0x03, L_incCounter[k][2]); - inc_counter(rbx, xmm_curr_counter, 0x04, L_incCounter[k][3]); - __ pop (rbx); - - load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); // load Round 0 key. interleaving for better performance - - CTR_DoFour(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR - CTR_DoFour(pxor, xmm_key); //PXOR with Round 0 key - - for (int i = 1; i < rounds[k]; ++i) { - load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); - CTR_DoFour(aesenc, xmm_key); - } - load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); - CTR_DoFour(aesenclast, xmm_key); - - // get next PARALLEL_FACTOR blocks into xmm_from registers - __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize)); - __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize)); - - // PXOR with input text - __ pxor(xmm_result0, xmm_from0); //result0 is xmm4 - __ pxor(xmm_result1, xmm_from1); - __ pxor(xmm_result2, xmm_from2); - - // store PARALLEL_FACTOR results into the next 64 bytes of output - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1); - __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2); - - // do it here after xmm_result0 is saved, because xmm_from3 reuse the same register of xmm_result0. - __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize)); - __ pxor(xmm_result3, xmm_from3); - __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3); - - __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text - __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length - __ jmp(L_multiBlock_loopTop[k]); - - // singleBlock starts here - __ align(OptoLoopAlignment); - __ BIND(L_singleBlockLoopTop[k]); - __ cmpptr(len_reg, 0); - __ jcc(Assembler::equal, L_exit); - __ movdqu(xmm_key_shuf_mask, ExternalAddress(key_shuffle_mask_addr())); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ movdqa(xmm_result0, xmm_curr_counter); - load_key(xmm_key, key, 0x00, xmm_key_shuf_mask); - __ push(rbx);//rbx is used for increasing counter - inc_counter(rbx, xmm_curr_counter, 0x01, L_incCounter_single[k]); - __ pop (rbx); - __ pshufb(xmm_result0, xmm_counter_shuf_mask); - __ pxor(xmm_result0, xmm_key); - for (int i = 1; i < rounds[k]; i++) { - load_key(xmm_key, key, (0x10 * i), xmm_key_shuf_mask); - __ aesenc(xmm_result0, xmm_key); - } - load_key(xmm_key, key, (0x10 * rounds[k]), xmm_key_shuf_mask); - __ aesenclast(xmm_result0, xmm_key); - __ cmpptr(len_reg, AESBlockSize); - __ jcc(Assembler::less, L_processTail_insr[k]); - __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); - __ pxor(xmm_result0, xmm_from0); - __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0); - __ addptr(pos, AESBlockSize); - __ subptr(len_reg, AESBlockSize); - __ jmp(L_singleBlockLoopTop[k]); - - __ BIND(L_processTail_insr[k]); // Process the tail part of the input array - __ addptr(pos, len_reg); // 1. Insert bytes from src array into xmm_from0 register - __ testptr(len_reg, 8); - __ jcc(Assembler::zero, L_processTail_4_insr[k]); - __ subptr(pos,8); - __ pinsrd(xmm_from0, Address(from, pos), 0); - __ pinsrd(xmm_from0, Address(from, pos, Address::times_1, 4), 1); - __ BIND(L_processTail_4_insr[k]); - __ testptr(len_reg, 4); - __ jcc(Assembler::zero, L_processTail_2_insr[k]); - __ subptr(pos,4); - __ pslldq(xmm_from0, 4); - __ pinsrd(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_2_insr[k]); - __ testptr(len_reg, 2); - __ jcc(Assembler::zero, L_processTail_1_insr[k]); - __ subptr(pos, 2); - __ pslldq(xmm_from0, 2); - __ pinsrw(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_1_insr[k]); - __ testptr(len_reg, 1); - __ jcc(Assembler::zero, L_processTail_exit_insr[k]); - __ subptr(pos, 1); - __ pslldq(xmm_from0, 1); - __ pinsrb(xmm_from0, Address(from, pos), 0); - __ BIND(L_processTail_exit_insr[k]); - - __ movptr(saved_encCounter_start, saved_counter_param); - __ movdqu(Address(saved_encCounter_start, 0), xmm_result0); // 2. Perform pxor of the encrypted counter and plaintext Bytes. - __ pxor(xmm_result0, xmm_from0); // Also the encrypted counter is saved for next invocation. - - __ testptr(len_reg, 8); - __ jcc(Assembler::zero, L_processTail_4_extr[k]); // 3. Extract bytes from xmm_result0 into the dest. array - __ pextrd(Address(to, pos), xmm_result0, 0); - __ pextrd(Address(to, pos, Address::times_1, 4), xmm_result0, 1); - __ psrldq(xmm_result0, 8); - __ addptr(pos, 8); - __ BIND(L_processTail_4_extr[k]); - __ testptr(len_reg, 4); - __ jcc(Assembler::zero, L_processTail_2_extr[k]); - __ pextrd(Address(to, pos), xmm_result0, 0); - __ psrldq(xmm_result0, 4); - __ addptr(pos, 4); - __ BIND(L_processTail_2_extr[k]); - __ testptr(len_reg, 2); - __ jcc(Assembler::zero, L_processTail_1_extr[k]); - __ pextrb(Address(to, pos), xmm_result0, 0); - __ pextrb(Address(to, pos, Address::times_1, 1), xmm_result0, 1); - __ psrldq(xmm_result0, 2); - __ addptr(pos, 2); - __ BIND(L_processTail_1_extr[k]); - __ testptr(len_reg, 1); - __ jcc(Assembler::zero, L_processTail_exit_extr[k]); - __ pextrb(Address(to, pos), xmm_result0, 0); - - __ BIND(L_processTail_exit_extr[k]); - __ movptr(used_addr, used_addr_param); - __ movl(Address(used_addr, 0), len_reg); - __ jmp(L_exit); - } - - __ BIND(L_exit); - __ movdqu(xmm_counter_shuf_mask, ExternalAddress(counter_shuffle_mask_addr())); - __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back. - __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back - handleSOERegisters(false /*restoring*/); - __ movptr(rax, len_param); // return length - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - __ BIND (L_key192_top); - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - __ jmp(L_multiBlock_loopTop[1]); //key192 - - __ BIND (L_key256_top); - __ movptr(pos, 0); // init pos before L_multiBlock_loopTop - __ jmp(L_multiBlock_loopTop[2]); //key192 - - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) - address generate_md5_implCompress(bool multi_block, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - const Register buf_param = rbp; - const Address state_param(rsp, 0 * wordSize); - const Address ofs_param (rsp, 1 * wordSize); - const Address limit_param(rsp, 2 * wordSize); - - __ enter(); - __ push(rbx); - __ push(rdi); - __ push(rsi); - __ push(rbp); - __ subptr(rsp, 3 * wordSize); - - __ movptr(rsi, Address(rbp, 8 + 4)); - __ movptr(state_param, rsi); - if (multi_block) { - __ movptr(rsi, Address(rbp, 8 + 8)); - __ movptr(ofs_param, rsi); - __ movptr(rsi, Address(rbp, 8 + 12)); - __ movptr(limit_param, rsi); - } - __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp - __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); - - __ addptr(rsp, 3 * wordSize); - __ pop(rbp); - __ pop(rsi); - __ pop(rdi); - __ pop(rbx); - __ leave(); - __ ret(0); - return start; - } - - address generate_upper_word_mask() { - __ align64(); - StubCodeMark mark(this, "StubRoutines", "upper_word_mask"); - address start = __ pc(); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0x00000000, relocInfo::none, 0); - __ emit_data(0xFFFFFFFF, relocInfo::none, 0); - return start; - } - - address generate_shuffle_byte_flip_mask() { - __ align64(); - StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask"); - address start = __ pc(); - __ emit_data(0x0c0d0e0f, relocInfo::none, 0); - __ emit_data(0x08090a0b, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x00010203, relocInfo::none, 0); - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) - address generate_sha1_implCompress(bool multi_block, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Register buf = rax; - Register state = rdx; - Register ofs = rcx; - Register limit = rdi; - - const Address buf_param(rbp, 8 + 0); - const Address state_param(rbp, 8 + 4); - const Address ofs_param(rbp, 8 + 8); - const Address limit_param(rbp, 8 + 12); - - const XMMRegister abcd = xmm0; - const XMMRegister e0 = xmm1; - const XMMRegister e1 = xmm2; - const XMMRegister msg0 = xmm3; - - const XMMRegister msg1 = xmm4; - const XMMRegister msg2 = xmm5; - const XMMRegister msg3 = xmm6; - const XMMRegister shuf_mask = xmm7; - - __ enter(); - __ subptr(rsp, 8 * wordSize); - handleSOERegisters(true /*saving*/); - - __ movptr(buf, buf_param); - __ movptr(state, state_param); - if (multi_block) { - __ movptr(ofs, ofs_param); - __ movptr(limit, limit_param); - } - - __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask, - buf, state, ofs, limit, rsp, multi_block); - - handleSOERegisters(false /*restoring*/); - __ addptr(rsp, 8 * wordSize); - __ leave(); - __ ret(0); - return start; - } - - address generate_pshuffle_byte_flip_mask() { - __ align64(); - StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask"); - address start = __ pc(); - __ emit_data(0x00010203, relocInfo::none, 0); - __ emit_data(0x04050607, relocInfo::none, 0); - __ emit_data(0x08090a0b, relocInfo::none, 0); - __ emit_data(0x0c0d0e0f, relocInfo::none, 0); - return start; - } - - // ofs and limit are use for multi-block byte array. - // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit) - address generate_sha256_implCompress(bool multi_block, const char *name) { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", name); - address start = __ pc(); - - Register buf = rbx; - Register state = rsi; - Register ofs = rdx; - Register limit = rcx; - - const Address buf_param(rbp, 8 + 0); - const Address state_param(rbp, 8 + 4); - const Address ofs_param(rbp, 8 + 8); - const Address limit_param(rbp, 8 + 12); - - const XMMRegister msg = xmm0; - const XMMRegister state0 = xmm1; - const XMMRegister state1 = xmm2; - const XMMRegister msgtmp0 = xmm3; - - const XMMRegister msgtmp1 = xmm4; - const XMMRegister msgtmp2 = xmm5; - const XMMRegister msgtmp3 = xmm6; - const XMMRegister msgtmp4 = xmm7; - - __ enter(); - __ subptr(rsp, 8 * wordSize); - handleSOERegisters(true /*saving*/); - __ movptr(buf, buf_param); - __ movptr(state, state_param); - if (multi_block) { - __ movptr(ofs, ofs_param); - __ movptr(limit, limit_param); - } - - __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4, - buf, state, ofs, limit, rsp, multi_block); - - handleSOERegisters(false); - __ addptr(rsp, 8 * wordSize); - __ leave(); - __ ret(0); - return start; - } - - // byte swap x86 long - address ghash_long_swap_mask_addr() { - return (address)GHASH_LONG_SWAP_MASK; - } - - // byte swap x86 byte array - address ghash_byte_swap_mask_addr() { - return (address)GHASH_BYTE_SWAP_MASK; - } - - /* Single and multi-block ghash operations */ - address generate_ghash_processBlocks() { - assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support"); - __ align(CodeEntryAlignment); - Label L_ghash_loop, L_exit; - StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks"); - address start = __ pc(); - - const Register state = rdi; - const Register subkeyH = rsi; - const Register data = rdx; - const Register blocks = rcx; - - const Address state_param(rbp, 8+0); - const Address subkeyH_param(rbp, 8+4); - const Address data_param(rbp, 8+8); - const Address blocks_param(rbp, 8+12); - - const XMMRegister xmm_temp0 = xmm0; - const XMMRegister xmm_temp1 = xmm1; - const XMMRegister xmm_temp2 = xmm2; - const XMMRegister xmm_temp3 = xmm3; - const XMMRegister xmm_temp4 = xmm4; - const XMMRegister xmm_temp5 = xmm5; - const XMMRegister xmm_temp6 = xmm6; - const XMMRegister xmm_temp7 = xmm7; - - __ enter(); - handleSOERegisters(true); // Save registers - - __ movptr(state, state_param); - __ movptr(subkeyH, subkeyH_param); - __ movptr(data, data_param); - __ movptr(blocks, blocks_param); - - __ movdqu(xmm_temp0, Address(state, 0)); - __ pshufb(xmm_temp0, ExternalAddress(ghash_long_swap_mask_addr())); - - __ movdqu(xmm_temp1, Address(subkeyH, 0)); - __ pshufb(xmm_temp1, ExternalAddress(ghash_long_swap_mask_addr())); - - __ BIND(L_ghash_loop); - __ movdqu(xmm_temp2, Address(data, 0)); - __ pshufb(xmm_temp2, ExternalAddress(ghash_byte_swap_mask_addr())); - - __ pxor(xmm_temp0, xmm_temp2); - - // - // Multiply with the hash key - // - __ movdqu(xmm_temp3, xmm_temp0); - __ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0 - __ movdqu(xmm_temp4, xmm_temp0); - __ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1 - - __ movdqu(xmm_temp5, xmm_temp0); - __ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0 - __ movdqu(xmm_temp6, xmm_temp0); - __ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1 - - __ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0 - - __ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5 - __ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right - __ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left - __ pxor(xmm_temp3, xmm_temp5); - __ pxor(xmm_temp6, xmm_temp4); // Register pair holds the result - // of the carry-less multiplication of - // xmm0 by xmm1. - - // We shift the result of the multiplication by one bit position - // to the left to cope for the fact that the bits are reversed. - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp4, xmm_temp6); - __ pslld (xmm_temp3, 1); - __ pslld(xmm_temp6, 1); - __ psrld(xmm_temp7, 31); - __ psrld(xmm_temp4, 31); - __ movdqu(xmm_temp5, xmm_temp7); - __ pslldq(xmm_temp4, 4); - __ pslldq(xmm_temp7, 4); - __ psrldq(xmm_temp5, 12); - __ por(xmm_temp3, xmm_temp7); - __ por(xmm_temp6, xmm_temp4); - __ por(xmm_temp6, xmm_temp5); - - // - // First phase of the reduction - // - // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts - // independently. - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp4, xmm_temp3); - __ movdqu(xmm_temp5, xmm_temp3); - __ pslld(xmm_temp7, 31); // packed right shift shifting << 31 - __ pslld(xmm_temp4, 30); // packed right shift shifting << 30 - __ pslld(xmm_temp5, 25); // packed right shift shifting << 25 - __ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions - __ pxor(xmm_temp7, xmm_temp5); - __ movdqu(xmm_temp4, xmm_temp7); - __ pslldq(xmm_temp7, 12); - __ psrldq(xmm_temp4, 4); - __ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete - - // - // Second phase of the reduction - // - // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these - // shift operations. - __ movdqu(xmm_temp2, xmm_temp3); - __ movdqu(xmm_temp7, xmm_temp3); - __ movdqu(xmm_temp5, xmm_temp3); - __ psrld(xmm_temp2, 1); // packed left shifting >> 1 - __ psrld(xmm_temp7, 2); // packed left shifting >> 2 - __ psrld(xmm_temp5, 7); // packed left shifting >> 7 - __ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions - __ pxor(xmm_temp2, xmm_temp5); - __ pxor(xmm_temp2, xmm_temp4); - __ pxor(xmm_temp3, xmm_temp2); - __ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6 - - __ decrement(blocks); - __ jcc(Assembler::zero, L_exit); - __ movdqu(xmm_temp0, xmm_temp6); - __ addptr(data, 16); - __ jmp(L_ghash_loop); - - __ BIND(L_exit); - // Byte swap 16-byte result - __ pshufb(xmm_temp6, ExternalAddress(ghash_long_swap_mask_addr())); - __ movdqu(Address(state, 0), xmm_temp6); // store the result - - handleSOERegisters(false); // restore registers - __ leave(); - __ ret(0); - return start; - } - - /** - * Arguments: - * - * Inputs: - * rsp(4) - int crc - * rsp(8) - byte* buf - * rsp(12) - int length - * - * Output: - * rax - int crc result - */ - address generate_updateBytesCRC32() { - assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions"); - - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32"); - - address start = __ pc(); - - const Register crc = rdx; // crc - const Register buf = rsi; // source java byte array address - const Register len = rcx; // length - const Register table = rdi; // crc_table address (reuse register) - const Register tmp = rbx; - assert_different_registers(crc, buf, len, table, tmp, rax); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ push(rsi); - __ push(rdi); - __ push(rbx); - - Address crc_arg(rbp, 8 + 0); - Address buf_arg(rbp, 8 + 4); - Address len_arg(rbp, 8 + 8); - - // Load up: - __ movl(crc, crc_arg); - __ movptr(buf, buf_arg); - __ movl(len, len_arg); - - __ kernel_crc32(crc, buf, len, table, tmp); - - __ movl(rax, crc); - __ pop(rbx); - __ pop(rdi); - __ pop(rsi); - __ vzeroupper(); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - /** - * Arguments: - * - * Inputs: - * rsp(4) - int crc - * rsp(8) - byte* buf - * rsp(12) - int length - * rsp(16) - table_start - optional (present only when doing a library_calll, - * not used by x86 algorithm) - * - * Output: - * rax - int crc result - */ - address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) { - assert(UseCRC32CIntrinsics, "need SSE4_2"); - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); - address start = __ pc(); - const Register crc = rax; // crc - const Register buf = rcx; // source java byte array address - const Register len = rdx; // length - const Register d = rbx; - const Register g = rsi; - const Register h = rdi; - const Register empty = noreg; // will never be used, in order not - // to change a signature for crc32c_IPL_Alg2_Alt2 - // between 64/32 I'm just keeping it here - assert_different_registers(crc, buf, len, d, g, h); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - Address crc_arg(rsp, 4 + 4 + 0); // ESP+4 + - // we need to add additional 4 because __ enter - // have just pushed ebp on a stack - Address buf_arg(rsp, 4 + 4 + 4); - Address len_arg(rsp, 4 + 4 + 8); - // Load up: - __ movl(crc, crc_arg); - __ movl(buf, buf_arg); - __ movl(len, len_arg); - __ push(d); - __ push(g); - __ push(h); - __ crc32c_ipl_alg2_alt2(crc, buf, len, - d, g, h, - empty, empty, empty, - xmm0, xmm1, xmm2, - is_pclmulqdq_supported); - __ pop(h); - __ pop(g); - __ pop(d); - __ vzeroupper(); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - } - - address generate_libmExp() { - StubCodeMark mark(this, "StubRoutines", "libmExp"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_exp(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmLog() { - StubCodeMark mark(this, "StubRoutines", "libmLog"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_log(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmLog10() { - StubCodeMark mark(this, "StubRoutines", "libmLog10"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_log10(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmPow() { - StubCodeMark mark(this, "StubRoutines", "libmPow"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_pow(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libm_reduce_pi04l() { - StubCodeMark mark(this, "StubRoutines", "libm_reduce_pi04l"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_reduce_pi04l(rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libm_sin_cos_huge() { - StubCodeMark mark(this, "StubRoutines", "libm_sin_cos_huge"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_sincos_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libmSin() { - StubCodeMark mark(this, "StubRoutines", "libmSin"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_sin(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rbx, rdx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libmCos() { - StubCodeMark mark(this, "StubRoutines", "libmCos"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_cos(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_libm_tan_cot_huge() { - StubCodeMark mark(this, "StubRoutines", "libm_tan_cot_huge"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ libm_tancot_huge(xmm0, xmm1, rax, rcx, rdx, rbx, rsi, rdi, rbp, rsp); - - return start; - - } - - address generate_libmTan() { - StubCodeMark mark(this, "StubRoutines", "libmTan"); - - address start = __ pc(); - - BLOCK_COMMENT("Entry:"); - __ enter(); // required for proper stackwalking of RuntimeStub frame - __ fast_tan(xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, - rax, rcx, rdx, rbx); - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(0); - - return start; - - } - - address generate_method_entry_barrier() { - __ align(CodeEntryAlignment); - StubCodeMark mark(this, "StubRoutines", "nmethod_entry_barrier"); - - Label deoptimize_label; - - address start = __ pc(); - - __ push(-1); // cookie, this is used for writing the new rsp when deoptimizing - - BLOCK_COMMENT("Entry:"); - __ enter(); // save rbp - - // save rbx, because we want to use that value. - // We could do without it but then we depend on the number of slots used by pusha - __ push(rbx); - - __ lea(rbx, Address(rsp, wordSize * 3)); // 1 for cookie, 1 for rbp, 1 for rbx - this should be the return address - - __ pusha(); - - // xmm0 and xmm1 may be used for passing float/double arguments - - if (UseSSE >= 2) { - const int xmm_size = wordSize * 4; - __ subptr(rsp, xmm_size * 2); - __ movdbl(Address(rsp, xmm_size * 1), xmm1); - __ movdbl(Address(rsp, xmm_size * 0), xmm0); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * 2; - __ subptr(rsp, xmm_size * 2); - __ movflt(Address(rsp, xmm_size * 1), xmm1); - __ movflt(Address(rsp, xmm_size * 0), xmm0); - } - - __ call_VM_leaf(CAST_FROM_FN_PTR(address, static_cast(BarrierSetNMethod::nmethod_stub_entry_barrier)), rbx); - - if (UseSSE >= 2) { - const int xmm_size = wordSize * 4; - __ movdbl(xmm0, Address(rsp, xmm_size * 0)); - __ movdbl(xmm1, Address(rsp, xmm_size * 1)); - __ addptr(rsp, xmm_size * 2); - } else if (UseSSE >= 1) { - const int xmm_size = wordSize * 2; - __ movflt(xmm0, Address(rsp, xmm_size * 0)); - __ movflt(xmm1, Address(rsp, xmm_size * 1)); - __ addptr(rsp, xmm_size * 2); - } - - __ cmpl(rax, 1); // 1 means deoptimize - __ jcc(Assembler::equal, deoptimize_label); - - __ popa(); - __ pop(rbx); - - __ leave(); - - __ addptr(rsp, 1 * wordSize); // cookie - __ ret(0); - - __ BIND(deoptimize_label); - - __ popa(); - __ pop(rbx); - - __ leave(); - - // this can be taken out, but is good for verification purposes. getting a SIGSEGV - // here while still having a correct stack is valuable - __ testptr(rsp, Address(rsp, 0)); - - __ movptr(rsp, Address(rsp, 0)); // new rsp was written in the barrier - __ jmp(Address(rsp, -1 * wordSize)); // jmp target should be callers verified_entry_point - - return start; - } - - private: - - void create_control_words() { - // Round to nearest, 53-bit mode, exceptions masked - StubRoutines::x86::_fpu_cntrl_wrd_std = 0x027F; - // Round to zero, 53-bit mode, exception mased - StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0x0D7F; - // Round to nearest, 24-bit mode, exceptions masked - StubRoutines::x86::_fpu_cntrl_wrd_24 = 0x007F; - // Round to nearest, 64-bit mode, exceptions masked, flags specialized - StubRoutines::x86::_mxcsr_std = EnableX86ECoreOpts ? 0x1FBF : 0x1F80; - // Note: the following two constants are 80-bit values - // layout is critical for correct loading by FPU. - // Bias for strict fp multiply/divide - StubRoutines::x86::_fpu_subnormal_bias1[0]= 0x00000000; // 2^(-15360) == 0x03ff 8000 0000 0000 0000 - StubRoutines::x86::_fpu_subnormal_bias1[1]= 0x80000000; - StubRoutines::x86::_fpu_subnormal_bias1[2]= 0x03ff; - // Un-Bias for strict fp multiply/divide - StubRoutines::x86::_fpu_subnormal_bias2[0]= 0x00000000; // 2^(+15360) == 0x7bff 8000 0000 0000 0000 - StubRoutines::x86::_fpu_subnormal_bias2[1]= 0x80000000; - StubRoutines::x86::_fpu_subnormal_bias2[2]= 0x7bff; - } - - address generate_cont_thaw() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - address generate_cont_returnBarrier() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - address generate_cont_returnBarrier_exception() { - if (!Continuations::enabled()) return nullptr; - Unimplemented(); - return nullptr; - } - - //--------------------------------------------------------------------------- - // Initialization - - void generate_initial_stubs() { - // Generates all stubs and initializes the entry points - - //------------------------------------------------------------------------------------------------------------------------ - // entry points that exist in all platforms - // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller than - // the disadvantage of having a much more complicated generator structure. See also comment in stubRoutines.hpp. - StubRoutines::_forward_exception_entry = generate_forward_exception(); - - StubRoutines::_call_stub_entry = - generate_call_stub(StubRoutines::_call_stub_return_address); - // is referenced by megamorphic call - StubRoutines::_catch_exception_entry = generate_catch_exception(); - - // platform dependent - create_control_words(); - - // Initialize table for copy memory (arraycopy) check. - if (UnsafeMemoryAccess::_table == nullptr) { - UnsafeMemoryAccess::create_table(16 + 4); // 16 for copyMemory; 4 for setMemory - } - - StubRoutines::x86::_verify_mxcsr_entry = generate_verify_mxcsr(); - StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = generate_verify_fpu_cntrl_wrd(); - StubRoutines::x86::_d2i_wrapper = generate_d2i_wrapper(T_INT, CAST_FROM_FN_PTR(address, SharedRuntime::d2i)); - StubRoutines::x86::_d2l_wrapper = generate_d2i_wrapper(T_LONG, CAST_FROM_FN_PTR(address, SharedRuntime::d2l)); - - if (UseCRC32Intrinsics) { - // set table address before stub generation which use it - StubRoutines::_crc_table_adr = (address)StubRoutines::x86::_crc_table; - StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); - } - - if (UseCRC32CIntrinsics) { - bool supports_clmul = VM_Version::supports_clmul(); - StubRoutines::x86::generate_CRC32C_table(supports_clmul); - StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table; - StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul); - } - if (VM_Version::supports_sse2() && UseLibmIntrinsic && InlineIntrinsics) { - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dexp)) { - StubRoutines::_dexp = generate_libmExp(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog)) { - StubRoutines::_dlog = generate_libmLog(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dlog10)) { - StubRoutines::_dlog10 = generate_libmLog10(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dpow)) { - StubRoutines::_dpow = generate_libmPow(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { - StubRoutines::_dlibm_reduce_pi04l = generate_libm_reduce_pi04l(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin) || - vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { - StubRoutines::_dlibm_sin_cos_huge = generate_libm_sin_cos_huge(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { - StubRoutines::_dsin = generate_libmSin(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { - StubRoutines::_dcos = generate_libmCos(); - } - if (vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dtan)) { - StubRoutines::_dlibm_tan_cot_huge = generate_libm_tan_cot_huge(); - StubRoutines::_dtan = generate_libmTan(); - } - } - } - - void generate_continuation_stubs() { - // Continuation stubs: - StubRoutines::_cont_thaw = generate_cont_thaw(); - StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier(); - StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception(); - } - - void generate_final_stubs() { - // Generates all stubs and initializes the entry points - - // support for verify_oop (must happen after universe_init) - StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); - - // arraycopy stubs used by compilers - generate_arraycopy_stubs(); - - BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod(); - if (bs_nm != nullptr) { - StubRoutines::_method_entry_barrier = generate_method_entry_barrier(); - } - } - - void generate_compiler_stubs() { -#if COMPILER2_OR_JVMCI - - // entry points that are C2/JVMCI specific - - StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF); - StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x80000000); - StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF); - StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000); - StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff); - StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff); - StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff); - StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit, - 0xFFFFFFFF, 0, 0, 0); - StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit, - 0xFFFFFFFF, 0xFFFFFFFF, 0, 0); - StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x03020100); - StubRoutines::x86::_vector_byte_shuffle_mask = generate_vector_byte_shuffle_mask("vector_byte_shuffle_mask"); - StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x01000100); - StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double("vector_long_shuffle_mask", 0x00000001, 0x0); - StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask"); - StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000); - StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF); - StubRoutines::x86::_vector_int_mask_cmp_bits = generate_vector_mask("vector_int_mask_cmp_bits", 0x00000001); - StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices"); - StubRoutines::x86::_vector_count_leading_zeros_lut = generate_count_leading_zeros_lut("count_leading_zeros_lut"); - StubRoutines::x86::_vector_reverse_bit_lut = generate_vector_reverse_bit_lut("reverse_bit_lut"); - StubRoutines::x86::_vector_reverse_byte_perm_mask_long = generate_vector_reverse_byte_perm_mask_long("perm_mask_long"); - StubRoutines::x86::_vector_reverse_byte_perm_mask_int = generate_vector_reverse_byte_perm_mask_int("perm_mask_int"); - StubRoutines::x86::_vector_reverse_byte_perm_mask_short = generate_vector_reverse_byte_perm_mask_short("perm_mask_short"); - - if (VM_Version::supports_avx2() && !VM_Version::supports_avx512_vpopcntdq()) { - // lut implementation influenced by counting 1s algorithm from section 5-1 of Hackers' Delight. - StubRoutines::x86::_vector_popcount_lut = generate_popcount_avx_lut("popcount_lut"); - } - - // don't bother generating these AES intrinsic stubs unless global flag is set - if (UseAESIntrinsics) { - StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); - StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); - StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt(); - StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel(); - } - - if (UseAESCTRIntrinsics) { - StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); - } - - if (UseMD5Intrinsics) { - StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); - StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); - } - if (UseSHA1Intrinsics) { - StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask(); - StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask(); - StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress"); - StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB"); - } - if (UseSHA256Intrinsics) { - StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256; - StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask(); - StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress"); - StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB"); - } - - // Generate GHASH intrinsics code - if (UseGHASHIntrinsics) { - StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); - } -#endif // COMPILER2_OR_JVMCI - } - - - public: - StubGenerator(CodeBuffer* code, StubsKind kind) : StubCodeGenerator(code) { - switch(kind) { - case Initial_stubs: - generate_initial_stubs(); - break; - case Continuation_stubs: - generate_continuation_stubs(); - break; - case Compiler_stubs: - generate_compiler_stubs(); - break; - case Final_stubs: - generate_final_stubs(); - break; - default: - fatal("unexpected stubs kind: %d", kind); - break; - }; - } -}; // end class declaration - -void StubGenerator_generate(CodeBuffer* code, StubCodeGenerator::StubsKind kind) { - StubGenerator g(code, kind); -} diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp index bc1cbdbba26b5..5516bce4f2c18 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp @@ -61,7 +61,6 @@ address StubRoutines::x86::_vector_popcount_lut = nullptr; address StubRoutines::x86::_vector_count_leading_zeros_lut = nullptr; address StubRoutines::x86::_vector_32_bit_mask = nullptr; address StubRoutines::x86::_vector_64_bit_mask = nullptr; -#ifdef _LP64 address StubRoutines::x86::_k256_W_adr = nullptr; address StubRoutines::x86::_k512_W_addr = nullptr; address StubRoutines::x86::_pshuffle_byte_flip_mask_addr_sha512 = nullptr; @@ -86,7 +85,6 @@ address StubRoutines::x86::_compress_perm_table32 = nullptr; address StubRoutines::x86::_compress_perm_table64 = nullptr; address StubRoutines::x86::_expand_perm_table32 = nullptr; address StubRoutines::x86::_expand_perm_table64 = nullptr; -#endif address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = nullptr; const uint64_t StubRoutines::x86::_crc_by128_masks[] = @@ -184,7 +182,6 @@ const juint StubRoutines::x86::_crc_table[] = 0x2d02ef8dUL }; -#ifdef _LP64 const juint StubRoutines::x86::_crc_table_avx512[] = { 0xe95c1271UL, 0x00000000UL, 0xce3371cbUL, 0x00000000UL, @@ -231,7 +228,6 @@ const juint StubRoutines::x86::_shuf_table_crc32_avx512[] = 0x83828100UL, 0x87868584UL, 0x8b8a8988UL, 0x8f8e8d8cUL, 0x03020100UL, 0x07060504UL, 0x0b0a0908UL, 0x000e0d0cUL }; -#endif // _LP64 const jint StubRoutines::x86::_arrays_hashcode_powers_of_31[] = { @@ -394,7 +390,6 @@ ATTRIBUTE_ALIGNED(64) const juint StubRoutines::x86::_k256[] = 0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL }; -#ifdef _LP64 // used in MacroAssembler::sha256_AVX2 // dynamically built from _k256 ATTRIBUTE_ALIGNED(64) juint StubRoutines::x86::_k256_W[2*sizeof(StubRoutines::x86::_k256)]; @@ -443,4 +438,3 @@ ATTRIBUTE_ALIGNED(64) const julong StubRoutines::x86::_k512_W[] = 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL, 0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL, }; -#endif diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp index 0a6d091de8c7f..613a67684a96f 100644 --- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp +++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp @@ -34,18 +34,17 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_ enum platform_dependent_constants { // simply increase sizes if too small (assembler will crash if too small) _initial_stubs_code_size = 20000 WINDOWS_ONLY(+1000), - _continuation_stubs_code_size = 1000 LP64_ONLY(+2000), + _continuation_stubs_code_size = 3000, // AVX512 intrinsics add more code in 64-bit VM, // Windows have more code to save/restore registers - _compiler_stubs_code_size = 20000 LP64_ONLY(+47000) WINDOWS_ONLY(+2000), - _final_stubs_code_size = 10000 LP64_ONLY(+20000) WINDOWS_ONLY(+22000) ZGC_ONLY(+20000) + _compiler_stubs_code_size = 67000 WINDOWS_ONLY(+2000), + _final_stubs_code_size = 30000 WINDOWS_ONLY(+22000) ZGC_ONLY(+20000) }; class x86 { friend class StubGenerator; friend class VMStructs; -#ifdef _LP64 private: static address _get_previous_sp_entry; @@ -101,50 +100,19 @@ class x86 { return _double_sign_flip; } -#else // !LP64 - - private: - static address _verify_fpu_cntrl_wrd_entry; - static address _d2i_wrapper; - static address _d2l_wrapper; - - static jint _fpu_cntrl_wrd_std; - static jint _fpu_cntrl_wrd_24; - static jint _fpu_cntrl_wrd_trunc; - - static jint _fpu_subnormal_bias1[3]; - static jint _fpu_subnormal_bias2[3]; - - public: - static address verify_fpu_cntrl_wrd_entry() { return _verify_fpu_cntrl_wrd_entry; } - static address d2i_wrapper() { return _d2i_wrapper; } - static address d2l_wrapper() { return _d2l_wrapper; } - static address addr_fpu_cntrl_wrd_std() { return (address)&_fpu_cntrl_wrd_std; } - static address addr_fpu_cntrl_wrd_24() { return (address)&_fpu_cntrl_wrd_24; } - static address addr_fpu_cntrl_wrd_trunc() { return (address)&_fpu_cntrl_wrd_trunc; } - static address addr_fpu_subnormal_bias1() { return (address)&_fpu_subnormal_bias1; } - static address addr_fpu_subnormal_bias2() { return (address)&_fpu_subnormal_bias2; } - - static jint fpu_cntrl_wrd_std() { return _fpu_cntrl_wrd_std; } -#endif // !LP64 - private: static jint _mxcsr_std; -#ifdef _LP64 static jint _mxcsr_rz; -#endif // _LP64 static address _verify_mxcsr_entry; // masks and table for CRC32 static const uint64_t _crc_by128_masks[]; static const juint _crc_table[]; -#ifdef _LP64 static const juint _crc_by128_masks_avx512[]; static const juint _crc_table_avx512[]; static const juint _crc32c_table_avx512[]; static const juint _shuf_table_crc32_avx512[]; -#endif // _LP64 // table for CRC32C static juint* _crc32c_table; // table for arrays_hashcode @@ -182,7 +150,6 @@ class x86 { static address _vector_reverse_byte_perm_mask_long; static address _vector_reverse_byte_perm_mask_int; static address _vector_reverse_byte_perm_mask_short; -#ifdef _LP64 static juint _k256_W[]; static address _k256_W_adr; static const julong _k512_W[]; @@ -206,23 +173,18 @@ class x86 { static address _join_1_2_base64; static address _join_2_3_base64; static address _decoding_table_base64; -#endif // byte flip mask for sha256 static address _pshuffle_byte_flip_mask_addr; public: static address addr_mxcsr_std() { return (address)&_mxcsr_std; } -#ifdef _LP64 static address addr_mxcsr_rz() { return (address)&_mxcsr_rz; } -#endif // _LP64 static address verify_mxcsr_entry() { return _verify_mxcsr_entry; } static address crc_by128_masks_addr() { return (address)_crc_by128_masks; } -#ifdef _LP64 static address crc_by128_masks_avx512_addr() { return (address)_crc_by128_masks_avx512; } static address shuf_table_crc32_avx512_addr() { return (address)_shuf_table_crc32_avx512; } static address crc_table_avx512_addr() { return (address)_crc_table_avx512; } static address crc32c_table_avx512_addr() { return (address)_crc32c_table_avx512; } -#endif // _LP64 static address upper_word_mask_addr() { return _upper_word_mask_addr; } static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; } static address k256_addr() { return _k256_adr; } @@ -322,7 +284,6 @@ class x86 { static address vector_popcount_lut() { return _vector_popcount_lut; } -#ifdef _LP64 static address k256_W_addr() { return _k256_W_adr; } static address k512_W_addr() { return _k512_W_addr; } static address pshuffle_byte_flip_mask_addr_sha512() { return _pshuffle_byte_flip_mask_addr_sha512; } @@ -346,7 +307,6 @@ class x86 { static address compress_perm_table64() { return _compress_perm_table64; } static address expand_perm_table32() { return _expand_perm_table32; } static address expand_perm_table64() { return _expand_perm_table64; } -#endif static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; } static address arrays_hashcode_powers_of_31() { return (address)_arrays_hashcode_powers_of_31; } static void generate_CRC32C_table(bool is_pclmulqdq_supported); diff --git a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp b/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp deleted file mode 100644 index 7916a3b36305a..0000000000000 --- a/src/hotspot/cpu/x86/stubRoutines_x86_32.cpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "runtime/deoptimization.hpp" -#include "runtime/frame.inline.hpp" -#include "runtime/javaThread.hpp" -#include "runtime/stubRoutines.hpp" - -// Implementation of the platform-specific part of StubRoutines - for -// a description of how to extend it, see the stubRoutines.hpp file. - -address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = nullptr; - -address StubRoutines::x86::_d2i_wrapper = nullptr; -address StubRoutines::x86::_d2l_wrapper = nullptr; - -jint StubRoutines::x86::_fpu_cntrl_wrd_std = 0; -jint StubRoutines::x86::_fpu_cntrl_wrd_24 = 0; -jint StubRoutines::x86::_fpu_cntrl_wrd_trunc = 0; - -jint StubRoutines::x86::_mxcsr_std = 0; - -jint StubRoutines::x86::_fpu_subnormal_bias1[3] = { 0, 0, 0 }; -jint StubRoutines::x86::_fpu_subnormal_bias2[3] = { 0, 0, 0 }; - diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp index 823b965a09b51..99aa9af5bad4d 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp @@ -64,15 +64,11 @@ // if too small. // Run with +PrintInterpreter to get the VM to print out the size. // Max size with JVMTI -#ifdef AMD64 int TemplateInterpreter::InterpreterCodeSize = JVMCI_ONLY(268) NOT_JVMCI(256) * 1024; -#else -int TemplateInterpreter::InterpreterCodeSize = 224 * 1024; -#endif // AMD64 // Global Register Names -static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi); -static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi); +static const Register rbcp = r13; +static const Register rlocals = r14; const int method_offset = frame::interpreter_frame_method_offset * wordSize; const int bcp_offset = frame::interpreter_frame_bcp_offset * wordSize; @@ -121,12 +117,11 @@ address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { // Setup parameters. // ??? convention: expect aberrant index in register ebx/rbx. // Pass array to create more detailed exceptions. - Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime:: throw_ArrayIndexOutOfBoundsException), - rarg, rbx); + c_rarg1, rbx); return entry; } @@ -134,8 +129,7 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() { address entry = __ pc(); // object is at TOS - Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); - __ pop(rarg); + __ pop(c_rarg1); // expression stack must be empty before entering the VM if an // exception happened @@ -145,7 +139,7 @@ address TemplateInterpreterGenerator::generate_ClassCastException_handler() { CAST_FROM_FN_PTR(address, InterpreterRuntime:: throw_ClassCastException), - rarg); + c_rarg1); return entry; } @@ -154,8 +148,8 @@ address TemplateInterpreterGenerator::generate_exception_handler_common( assert(!pass_oop || message == nullptr, "either oop or message but not both"); address entry = __ pc(); - Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); - Register rarg2 = NOT_LP64(rbx) LP64_ONLY(c_rarg2); + Register rarg = c_rarg1; + Register rarg2 = c_rarg2; if (pass_oop) { // object is at TOS @@ -185,30 +179,6 @@ address TemplateInterpreterGenerator::generate_exception_handler_common( address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { address entry = __ pc(); -#ifndef _LP64 -#ifdef COMPILER2 - // The FPU stack is clean if UseSSE >= 2 but must be cleaned in other cases - if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { - for (int i = 1; i < 8; i++) { - __ ffree(i); - } - } else if (UseSSE < 2) { - __ empty_FPU_stack(); - } -#endif // COMPILER2 - if ((state == ftos && UseSSE < 1) || (state == dtos && UseSSE < 2)) { - __ MacroAssembler::verify_FPU(1, "generate_return_entry_for compiled"); - } else { - __ MacroAssembler::verify_FPU(0, "generate_return_entry_for compiled"); - } - - if (state == ftos) { - __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_return_entry_for in interpreter"); - } else if (state == dtos) { - __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_return_entry_for in interpreter"); - } -#endif // _LP64 - // Restore stack bottom in case i2c adjusted stack __ movptr(rcx, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); __ lea(rsp, Address(rbp, rcx, Address::times_ptr)); @@ -237,14 +207,11 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, __ lea(rsp, Address(rsp, cache, Interpreter::stackElementScale())); } - const Register java_thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); - if (JvmtiExport::can_pop_frame()) { - NOT_LP64(__ get_thread(java_thread)); - __ check_and_handle_popframe(java_thread); + if (JvmtiExport::can_pop_frame()) { + __ check_and_handle_popframe(r15_thread); } if (JvmtiExport::can_force_early_return()) { - NOT_LP64(__ get_thread(java_thread)); - __ check_and_handle_earlyret(java_thread); + __ check_and_handle_earlyret(r15_thread); } __ dispatch_next(state, step); @@ -256,20 +223,11 @@ address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step, address continuation) { address entry = __ pc(); -#ifndef _LP64 - if (state == ftos) { - __ MacroAssembler::verify_FPU(UseSSE >= 1 ? 0 : 1, "generate_deopt_entry_for in interpreter"); - } else if (state == dtos) { - __ MacroAssembler::verify_FPU(UseSSE >= 2 ? 0 : 1, "generate_deopt_entry_for in interpreter"); - } -#endif // _LP64 - // null last_sp until next java call __ movptr(Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize), NULL_WORD); __ restore_bcp(); __ restore_locals(); - const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread)); + const Register thread = r15_thread; #if INCLUDE_JVMCI // Check if we need to take lock at entry of synchronized method. This can // only occur on method entry so emit it only for vtos with step 0. @@ -320,46 +278,14 @@ address TemplateInterpreterGenerator::generate_result_handler_for( address entry = __ pc(); switch (type) { case T_BOOLEAN: __ c2bool(rax); break; -#ifndef _LP64 - case T_CHAR : __ andptr(rax, 0xFFFF); break; -#else case T_CHAR : __ movzwl(rax, rax); break; -#endif // _LP64 case T_BYTE : __ sign_extend_byte(rax); break; case T_SHORT : __ sign_extend_short(rax); break; case T_INT : /* nothing to do */ break; case T_LONG : /* nothing to do */ break; case T_VOID : /* nothing to do */ break; -#ifndef _LP64 - case T_DOUBLE : - case T_FLOAT : - { const Register t = InterpreterRuntime::SignatureHandlerGenerator::temp(); - __ pop(t); // remove return address first - // Must return a result for interpreter or compiler. In SSE - // mode, results are returned in xmm0 and the FPU stack must - // be empty. - if (type == T_FLOAT && UseSSE >= 1) { - // Load ST0 - __ fld_d(Address(rsp, 0)); - // Store as float and empty fpu stack - __ fstp_s(Address(rsp, 0)); - // and reload - __ movflt(xmm0, Address(rsp, 0)); - } else if (type == T_DOUBLE && UseSSE >= 2 ) { - __ movdbl(xmm0, Address(rsp, 0)); - } else { - // restore ST0 - __ fld_d(Address(rsp, 0)); - } - // and pop the temp - __ addptr(rsp, 2 * wordSize); - __ push(t); // restore return address - } - break; -#else case T_FLOAT : /* nothing to do */ break; case T_DOUBLE : /* nothing to do */ break; -#endif // _LP64 case T_OBJECT : // retrieve result from frame @@ -468,12 +394,11 @@ void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) // of the verified entry point for the method or null if the // compilation did not complete (either went background or bailed // out). - Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); - __ movl(rarg, 0); + __ movl(c_rarg1, 0); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), - rarg); + c_rarg1); __ movptr(rbx, Address(rbp, method_offset)); // restore Method* // Preserve invariant that r13/r14 contain bcp/locals of sender frame @@ -524,13 +449,8 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { // the stack before the red zone Label after_frame_check_pop; - const Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread); -#ifndef _LP64 - __ push(thread); - __ get_thread(thread); -#endif - const Address stack_limit(thread, JavaThread::stack_overflow_limit_offset()); + const Address stack_limit(r15_thread, JavaThread::stack_overflow_limit_offset()); // locals + overhead, in bytes __ mov(rax, rdx); @@ -553,7 +473,6 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { __ cmpptr(rsp, rax); __ jcc(Assembler::above, after_frame_check_pop); - NOT_LP64(__ pop(rsi)); // get saved bcp // Restore sender's sp as SP. This is necessary if the sender's // frame is an extended compiled frame (see gen_c2i_adapter()) @@ -569,7 +488,6 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { __ jump(RuntimeAddress(SharedRuntime::throw_StackOverflowError_entry())); // all done with frame size check __ bind(after_frame_check_pop); - NOT_LP64(__ pop(rsi)); // all done with frame size check __ bind(after_frame_check); @@ -632,7 +550,7 @@ void TemplateInterpreterGenerator::lock_method() { __ subptr(monitor_block_top, entry_size / wordSize); // set new monitor block top // store object __ movptr(Address(rsp, BasicObjectLock::obj_offset()), rax); - const Register lockreg = NOT_LP64(rdx) LP64_ONLY(c_rarg1); + const Register lockreg = c_rarg1; __ movptr(lockreg, rsp); // object address __ lock_object(lockreg); } @@ -731,15 +649,13 @@ address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { // Preserve the sender sp in case the load barrier // calls the runtime - NOT_LP64(__ push(rsi)); // Load the value of the referent field. const Address field_address(rax, referent_offset); __ load_heap_oop(rax, field_address, /*tmp1*/ rbx, /*tmp_thread*/ rdx, ON_WEAK_OOP_REF); // _areturn - const Register sender_sp = NOT_LP64(rsi) LP64_ONLY(r13); - NOT_LP64(__ pop(rsi)); // get sender sp + const Register sender_sp = r13; __ pop(rdi); // get return address __ mov(rsp, sender_sp); // set sp to sender sp __ jmp(rdi); @@ -765,11 +681,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { const int page_size = (int)os::vm_page_size(); const int n_shadow_pages = shadow_zone_size / page_size; - const Register thread = NOT_LP64(rsi) LP64_ONLY(r15_thread); -#ifndef _LP64 - __ push(thread); - __ get_thread(thread); -#endif + const Register thread = r15_thread; #ifdef ASSERT Label L_good_limit; @@ -801,10 +713,6 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { __ movptr(Address(thread, JavaThread::shadow_zone_growth_watermark()), rsp); __ bind(L_done); - -#ifndef _LP64 - __ pop(thread); -#endif } // Interpreter stub for calling a native method. (asm interpreter) @@ -878,9 +786,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // _do_not_unlock_if_synchronized to true. The remove_activation will // check this flag. - const Register thread1 = NOT_LP64(rax) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread1)); - const Address do_not_unlock_if_synchronized(thread1, + const Address do_not_unlock_if_synchronized(r15_thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); __ movbool(do_not_unlock_if_synchronized, true); @@ -896,7 +802,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { bang_stack_shadow_pages(true); // reset the _do_not_unlock_if_synchronized flag - NOT_LP64(__ get_thread(thread1)); __ movbool(do_not_unlock_if_synchronized, false); // check for synchronized methods @@ -938,26 +843,19 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // work registers const Register method = rbx; - const Register thread = NOT_LP64(rdi) LP64_ONLY(r15_thread); - const Register t = NOT_LP64(rcx) LP64_ONLY(r11); + const Register thread = r15_thread; + const Register t = r11; // allocate space for parameters __ get_method(method); __ movptr(t, Address(method, Method::const_offset())); __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset())); -#ifndef _LP64 - __ shlptr(t, Interpreter::logStackElementSize); // Convert parameter count to bytes. - __ addptr(t, 2*wordSize); // allocate two more slots for JNIEnv and possible mirror - __ subptr(rsp, t); - __ andptr(rsp, -(StackAlignmentInBytes)); // gcc needs 16 byte aligned stacks to do XMM intrinsics -#else __ shll(t, Interpreter::logStackElementSize); __ subptr(rsp, t); __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // must be 16 byte boundary (see amd64 ABI) -#endif // _LP64 // get signature handler { @@ -979,7 +877,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { "adjust this code"); assert(InterpreterRuntime::SignatureHandlerGenerator::to() == rsp, "adjust this code"); - assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == NOT_LP64(t) LP64_ONLY(rscratch1), + assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1, "adjust this code"); // The generated handlers do not touch RBX (the method). @@ -1008,13 +906,8 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ movptr(Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize), t); // pass handle to mirror -#ifndef _LP64 - __ lea(t, Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize)); - __ movptr(Address(rsp, wordSize), t); -#else __ lea(c_rarg1, Address(rbp, frame::interpreter_frame_oop_temp_offset * wordSize)); -#endif // _LP64 __ bind(L); } @@ -1035,16 +928,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { } // pass JNIEnv -#ifndef _LP64 - __ get_thread(thread); - __ lea(t, Address(thread, JavaThread::jni_environment_offset())); - __ movptr(Address(rsp, 0), t); - - // set_last_Java_frame_before_call - // It is enough that the pc() - // points into the right code segment. It does not have to be the correct return pc. - __ set_last_Java_frame(thread, noreg, rbp, __ pc(), noreg); -#else __ lea(c_rarg0, Address(r15_thread, JavaThread::jni_environment_offset())); // It is enough that the pc() points into the right code @@ -1053,7 +936,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // case of preemption on Object.wait. Label native_return; __ set_last_Java_frame(rsp, rbp, native_return, rscratch1); -#endif // _LP64 // change thread state #ifdef ASSERT @@ -1089,39 +971,10 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // pushes change or anything else is added to the stack then the code in // interpreter_frame_result must also change. -#ifndef _LP64 - // save potential result in ST(0) & rdx:rax - // (if result handler is the T_FLOAT or T_DOUBLE handler, result must be in ST0 - - // the check is necessary to avoid potential Intel FPU overflow problems by saving/restoring 'empty' FPU registers) - // It is safe to do this push because state is _thread_in_native and return address will be found - // via _last_native_pc and not via _last_jave_sp - - // NOTE: the order of these push(es) is known to frame::interpreter_frame_result. - // If the order changes or anything else is added to the stack the code in - // interpreter_frame_result will have to be changed. - - { Label L; - Label push_double; - ExternalAddress float_handler(AbstractInterpreter::result_handler(T_FLOAT)); - ExternalAddress double_handler(AbstractInterpreter::result_handler(T_DOUBLE)); - __ cmpptr(Address(rbp, (frame::interpreter_frame_result_handler_offset)*wordSize), - float_handler.addr(), noreg); - __ jcc(Assembler::equal, push_double); - __ cmpptr(Address(rbp, (frame::interpreter_frame_result_handler_offset)*wordSize), - double_handler.addr(), noreg); - __ jcc(Assembler::notEqual, L); - __ bind(push_double); - __ push_d(); // FP values are returned using the FPU, so push FPU contents (even if UseSSE > 0). - __ bind(L); - } -#else __ push(dtos); -#endif // _LP64 - __ push(ltos); // change thread state - NOT_LP64(__ get_thread(thread)); __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native_trans); @@ -1131,12 +984,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { Assembler::LoadLoad | Assembler::LoadStore | Assembler::StoreLoad | Assembler::StoreStore)); } -#ifndef _LP64 - if (AlwaysRestoreFPU) { - // Make sure the control word is correct. - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } -#endif // _LP64 // check for safepoint operation in progress and/or pending suspend requests { @@ -1156,13 +1003,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // preserved and correspond to the bcp/locals pointers. So we do a // runtime call by hand. // -#ifndef _LP64 - __ push(thread); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, - JavaThread::check_special_condition_for_native_trans))); - __ increment(rsp, wordSize); - __ get_thread(thread); -#else __ mov(c_rarg0, r15_thread); __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows @@ -1170,14 +1010,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); -#endif // _LP64 __ bind(Continue); } // change thread state __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_Java); -#ifdef _LP64 if (LockingMode != LM_LEGACY) { // Check preemption for Object.wait() Label not_preempted; @@ -1193,7 +1031,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // any pc will do so just use this one for LM_LEGACY to keep code together. __ bind(native_return); } -#endif // _LP64 // reset_last_Java_frame __ reset_last_Java_frame(thread, true); @@ -1235,10 +1072,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ jcc(Assembler::notEqual, no_reguard); __ pusha(); // XXX only save smashed registers -#ifndef _LP64 - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages))); - __ popa(); -#else __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows __ andptr(rsp, -16); // align stack as required by ABI @@ -1246,7 +1079,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { __ mov(rsp, r12); // restore sp __ popa(); // XXX only restore smashed registers __ reinit_heapbase(); -#endif // _LP64 __ bind(no_reguard); } @@ -1294,7 +1126,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { (intptr_t)(frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock))); - const Register regmon = NOT_LP64(rdx) LP64_ONLY(c_rarg1); + const Register regmon = c_rarg1; // monitor expect in c_rarg1 for slow unlock path __ lea(regmon, monitor); // address of first monitor @@ -1326,7 +1158,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // restore potential result in ST0 & handle result __ pop(ltos); - LP64_ONLY( __ pop(dtos)); + __ pop(dtos); __ movptr(t, Address(rbp, (frame::interpreter_frame_result_handler_offset) * wordSize)); @@ -1455,8 +1287,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { // _do_not_unlock_if_synchronized to true. The remove_activation // will check this flag. - const Register thread = NOT_LP64(rax) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread)); + const Register thread = r15_thread; const Address do_not_unlock_if_synchronized(thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); __ movbool(do_not_unlock_if_synchronized, true); @@ -1475,7 +1306,6 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { bang_stack_shadow_pages(false); // reset the _do_not_unlock_if_synchronized flag - NOT_LP64(__ get_thread(thread)); __ movbool(do_not_unlock_if_synchronized, false); // check for synchronized methods @@ -1542,15 +1372,15 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // rdx: return address/pc that threw exception __ restore_bcp(); // r13/rsi points to call/send __ restore_locals(); - LP64_ONLY(__ reinit_heapbase()); // restore r12 as heapbase. + __ reinit_heapbase(); // restore r12 as heapbase. // Entry point for exceptions thrown within interpreter code Interpreter::_throw_exception_entry = __ pc(); // expression stack is undefined here // rax: exception // r13/rsi: exception bcp __ verify_oop(rax); - Register rarg = NOT_LP64(rax) LP64_ONLY(c_rarg1); - LP64_ONLY(__ mov(c_rarg1, rax)); + Register rarg = c_rarg1; + __ mov(c_rarg1, rax); // expression stack must be empty before entering the VM in case of // an exception @@ -1589,8 +1419,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // indicating that we are currently handling popframe, so that // call_VMs that may happen later do not trigger new popframe // handling cycles. - const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread)); + const Register thread = r15_thread; __ movl(rdx, Address(thread, JavaThread::popframe_condition_offset())); __ orl(rdx, JavaThread::popframe_processing_bit); __ movl(Address(thread, JavaThread::popframe_condition_offset()), rdx); @@ -1607,7 +1436,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // deoptimization blob's unpack entry because of the presence of // adapter frames in C2. Label caller_not_deoptimized; - Register rarg = NOT_LP64(rdx) LP64_ONLY(c_rarg1); + Register rarg = c_rarg1; __ movptr(rarg, Address(rbp, frame::return_addr_offset * wordSize)); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), rarg); @@ -1625,7 +1454,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() { __ subptr(rlocals, rax); __ addptr(rlocals, wordSize); // Save these arguments - NOT_LP64(__ get_thread(thread)); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization:: popframe_preserve_args), @@ -1638,7 +1466,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // Inform deoptimization that it is responsible for restoring // these arguments - NOT_LP64(__ get_thread(thread)); __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_force_deopt_reexecution_bit); @@ -1664,23 +1491,12 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // maintain this kind of invariant all the time we call a small // fixup routine to move the mutated arguments onto the top of our // expression stack if necessary. -#ifndef _LP64 - __ mov(rax, rsp); - __ movptr(rbx, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); - __ lea(rbx, Address(rbp, rbx, Address::times_ptr)); - __ get_thread(thread); - // PC must point into interpreter here - __ set_last_Java_frame(thread, noreg, rbp, __ pc(), noreg); - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, rax, rbx); - __ get_thread(thread); -#else __ mov(c_rarg1, rsp); __ movptr(c_rarg2, Address(rbp, frame::interpreter_frame_last_sp_offset * wordSize)); __ lea(c_rarg2, Address(rbp, c_rarg2, Address::times_ptr)); // PC must point into interpreter here __ set_last_Java_frame(noreg, rbp, __ pc(), rscratch1); __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), r15_thread, c_rarg1, c_rarg2); -#endif __ reset_last_Java_frame(thread, true); // Restore the last_sp and null it out @@ -1697,7 +1513,6 @@ void TemplateInterpreterGenerator::generate_throw_exception() { } // Clear the popframe condition flag - NOT_LP64(__ get_thread(thread)); __ movl(Address(thread, JavaThread::popframe_condition_offset()), JavaThread::popframe_inactive); @@ -1731,12 +1546,10 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // preserve exception over this code sequence __ pop_ptr(rax); - NOT_LP64(__ get_thread(thread)); __ movptr(Address(thread, JavaThread::vm_result_offset()), rax); // remove the activation (without doing throws on illegalMonitorExceptions) __ remove_activation(vtos, rdx, false, true, false); // restore exception - NOT_LP64(__ get_thread(thread)); __ get_vm_result(rax, thread); // In between activations - previous activation type unknown yet @@ -1772,8 +1585,7 @@ address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state __ empty_expression_stack(); __ load_earlyret_value(state); // 32 bits returns value in rdx, so don't reuse - const Register thread = NOT_LP64(rcx) LP64_ONLY(r15_thread); - NOT_LP64(__ get_thread(thread)); + const Register thread = r15_thread; __ movptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset())); Address cond_addr(rcx, JvmtiThreadState::earlyret_state_offset()); @@ -1805,21 +1617,12 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address& vep) { assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); Label L; -#ifndef _LP64 - fep = __ pc(); // ftos entry point - __ push(ftos); - __ jmpb(L); - dep = __ pc(); // dtos entry point - __ push(dtos); - __ jmpb(L); -#else fep = __ pc(); // ftos entry point __ push_f(xmm0); __ jmpb(L); dep = __ pc(); // dtos entry point __ push_d(xmm0); __ jmpb(L); -#endif // _LP64 lep = __ pc(); // ltos entry point __ push_l(); __ jmpb(L); @@ -1838,19 +1641,6 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, address TemplateInterpreterGenerator::generate_trace_code(TosState state) { address entry = __ pc(); -#ifndef _LP64 - // prepare expression stack - __ pop(rcx); // pop return address so expression stack is 'pure' - __ push(state); // save tosca - - // pass tosca registers as arguments & call tracer - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), rcx, rax, rdx); - __ mov(rcx, rax); // make sure return address is not destroyed by pop(state) - __ pop(state); // restore tosca - - // return - __ jmp(rcx); -#else __ push(state); __ push(c_rarg0); __ push(c_rarg1); @@ -1869,7 +1659,6 @@ address TemplateInterpreterGenerator::generate_trace_code(TosState state) { __ pop(c_rarg0); __ pop(state); __ ret(0); // return from result handler -#endif // _LP64 return entry; } @@ -1901,15 +1690,11 @@ void TemplateInterpreterGenerator::trace_bytecode(Template* t) { assert(Interpreter::trace_code(t->tos_in()) != nullptr, "entry must have been generated"); -#ifndef _LP64 - __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); -#else __ mov(r12, rsp); // remember sp (can only use r12 if not using call_VM) __ andptr(rsp, -16); // align stack as required by ABI __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in()))); __ mov(rsp, r12); // restore sp __ reinit_heapbase(); -#endif // _LP64 } diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp deleted file mode 100644 index 75611524e3b0a..0000000000000 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp +++ /dev/null @@ -1,510 +0,0 @@ -/* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "compiler/disassembler.hpp" -#include "interpreter/interp_masm.hpp" -#include "interpreter/interpreter.hpp" -#include "interpreter/interpreterRuntime.hpp" -#include "interpreter/templateInterpreterGenerator.hpp" -#include "runtime/arguments.hpp" -#include "runtime/sharedRuntime.hpp" -#include "runtime/stubRoutines.hpp" - -#define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> - - -address TemplateInterpreterGenerator::generate_slow_signature_handler() { - address entry = __ pc(); - // rbx,: method - // rcx: temporary - // rdi: pointer to locals - // rsp: end of copied parameters area - __ mov(rcx, rsp); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::slow_signature_handler), rbx, rdi, rcx); - __ ret(0); - return entry; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.update(int crc, int b) - */ -address TemplateInterpreterGenerator::generate_CRC32_update_entry() { - assert(UseCRC32Intrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - - // rbx: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - __ get_thread(rdi); - __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register val = rdx; // source java byte value - const Register tbl = rdi; // scratch - - // Arguments are reversed on java expression stack - __ movl(val, Address(rsp, wordSize)); // byte value - __ movl(crc, Address(rsp, 2*wordSize)); // Initial CRC - - __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr())); - __ notl(crc); // ~crc - __ update_byte_crc32(crc, val, tbl); - __ notl(crc); // ~crc - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; -} - -/** - * Method entry for static native methods: - * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - */ -address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - assert(UseCRC32Intrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - - // rbx,: Method* - // rsi: senderSP must preserved for slow path, set SP to it on fast path - // rdx: scratch - // rdi: scratch - - Label slow_path; - // If we need a safepoint check, generate full interpreter entry. - __ get_thread(rdi); - __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */); - - // We don't generate local frame and don't align stack because - // we call stub code and there is no safepoint on this path. - - // Load parameters - const Register crc = rax; // crc - const Register buf = rdx; // source java byte array address - const Register len = rdi; // length - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(len, Address(rsp, 4 + 0)); // Length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long buf - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32()), crc, buf, len); - // result in rax - - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - // generate a vanilla native entry as the slow path - __ bind(slow_path); - __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native)); - return entry; -} - -/** -* Method entry for static native methods: -* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) -* int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end) -*/ -address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { - assert(UseCRC32CIntrinsics, "this intrinsic is not supported"); - address entry = __ pc(); - // Load parameters - const Register crc = rax; // crc - const Register buf = rcx; // source java byte array address - const Register len = rdx; // length - const Register end = len; - - // value x86_32 - // interp. arg ptr ESP + 4 - // int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int end) - // 3 2 1 0 - // int java.util.zip.CRC32.updateByteBuffer(int crc, long address, int off, int end) - // 4 2,3 1 0 - - // Arguments are reversed on java expression stack - __ movl(end, Address(rsp, 4 + 0)); // end - __ subl(len, Address(rsp, 4 + 1 * wordSize)); // end - offset == length - // Calculate address of start element - if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // long address - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 4 * wordSize)); // Initial CRC - } else { - __ movptr(buf, Address(rsp, 4 + 2 * wordSize)); // byte[] array - __ addptr(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // + header size - __ addptr(buf, Address(rsp, 4 + 1 * wordSize)); // + offset - __ movl(crc, Address(rsp, 4 + 3 * wordSize)); // Initial CRC - } - __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, StubRoutines::updateBytesCRC32C()), crc, buf, len); - // result in rax - // _areturn - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry; -} - -/** - * Method entry for static native method: - * java.lang.Float.intBitsToFloat(int bits) - */ -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movflt(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static native method: - * java.lang.Float.floatToRawIntBits(float value) - */ -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { - if (UseSSE >= 1) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - - -/** - * Method entry for static native method: - * java.lang.Double.longBitsToDouble(long bits) - */ -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load 'bits' into xmm0 (interpreter returns results in xmm0) - __ movdbl(xmm0, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static native method: - * java.lang.Double.doubleToRawLongBits(double value) - */ -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { - if (UseSSE >= 2) { - address entry = __ pc(); - - // rsi: the sender's SP - - // Skip safepoint check (compiler intrinsic versions of this method - // do not perform safepoint checks either). - - // Load the parameter (a floating-point value) into rax. - __ movl(rdx, Address(rsp, 2*wordSize)); - __ movl(rax, Address(rsp, wordSize)); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; - } - - return nullptr; -} - -/** - * Method entry for static method: - * java.lang.Float.float16ToFloat(short floatBinary16) - */ -address TemplateInterpreterGenerator::generate_Float_float16ToFloat_entry() { - assert(VM_Version::supports_float16(), "this intrinsic is not supported"); - address entry = __ pc(); - - // rsi: the sender's SP - - // Load value into xmm0 and convert - __ movswl(rax, Address(rsp, wordSize)); - __ flt16_to_flt(xmm0, rax); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; -} - -/** - * Method entry for static method: - * java.lang.Float.floatToFloat16(float value) - */ -address TemplateInterpreterGenerator::generate_Float_floatToFloat16_entry() { - assert(VM_Version::supports_float16(), "this intrinsic is not supported"); - address entry = __ pc(); - - // rsi: the sender's SP - - // Load value into xmm0, convert and put result into rax - __ movflt(xmm0, Address(rsp, wordSize)); - __ flt_to_flt16(rax, xmm0, xmm1); - - // Return - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set rsp to the sender's SP - __ jmp(rdi); - return entry; -} - -address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { - - // rbx,: Method* - // rcx: scratrch - // rsi: sender sp - - address entry_point = __ pc(); - - // These don't need a safepoint check because they aren't virtually - // callable. We won't enter these intrinsics from compiled code. - // If in the future we added an intrinsic which was virtually callable - // we'd have to worry about how to safepoint so that this code is used. - - // mathematical functions inlined by compiler - // (interpreter must provide identical implementation - // in order to avoid monotonicity bugs when switching - // from interpreter to compiler in the middle of some - // computation) - // - // stack: [ ret adr ] <-- rsp - // [ lo(arg) ] - // [ hi(arg) ] - // - if (kind == Interpreter::java_lang_math_tanh) { - return nullptr; - } - - if (kind == Interpreter::java_lang_math_fmaD) { - if (!UseFMA) { - return nullptr; // Generate a vanilla entry - } - __ movdbl(xmm2, Address(rsp, 5 * wordSize)); - __ movdbl(xmm1, Address(rsp, 3 * wordSize)); - __ movdbl(xmm0, Address(rsp, 1 * wordSize)); - __ fmad(xmm0, xmm1, xmm2, xmm0); - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; - } else if (kind == Interpreter::java_lang_math_fmaF) { - if (!UseFMA) { - return nullptr; // Generate a vanilla entry - } - __ movflt(xmm2, Address(rsp, 3 * wordSize)); - __ movflt(xmm1, Address(rsp, 2 * wordSize)); - __ movflt(xmm0, Address(rsp, 1 * wordSize)); - __ fmaf(xmm0, xmm1, xmm2, xmm0); - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; - } - - __ fld_d(Address(rsp, 1*wordSize)); - switch (kind) { - case Interpreter::java_lang_math_sin : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2() && StubRoutines::dsin() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_cos : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (VM_Version::supports_sse2() && StubRoutines::dcos() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_tan : - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dtan() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_sqrt: - __ fsqrt(); - break; - case Interpreter::java_lang_math_abs: - __ fabs(); - break; - case Interpreter::java_lang_math_log: - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dlog() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_log10: - __ subptr(rsp, 2 * wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dlog10() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10)); - } - __ addptr(rsp, 2 * wordSize); - break; - case Interpreter::java_lang_math_pow: - __ fld_d(Address(rsp, 3*wordSize)); // second argument - __ subptr(rsp, 4 * wordSize); - __ fstp_d(Address(rsp, 0)); - __ fstp_d(Address(rsp, 2 * wordSize)); - if (StubRoutines::dpow() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow)); - } - __ addptr(rsp, 4 * wordSize); - break; - case Interpreter::java_lang_math_exp: - __ subptr(rsp, 2*wordSize); - __ fstp_d(Address(rsp, 0)); - if (StubRoutines::dexp() != nullptr) { - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp()))); - } else { - __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)); - } - __ addptr(rsp, 2*wordSize); - break; - default : - ShouldNotReachHere(); - } - - // return double result in xmm0 for interpreter and compilers. - if (UseSSE >= 2) { - __ subptr(rsp, 2*wordSize); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ addptr(rsp, 2*wordSize); - } - - // done, result in FPU ST(0) or XMM0 - __ pop(rdi); // get return address - __ mov(rsp, rsi); // set sp to sender sp - __ jmp(rdi); - - return entry_point; -} - -// Not supported -address TemplateInterpreterGenerator::generate_currentThread() { return nullptr; } - diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp index 5ea2d8eba259b..46f2fd49e8099 100644 --- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp +++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp @@ -499,9 +499,3 @@ address TemplateInterpreterGenerator::generate_currentThread() { return entry_point; } -// Not supported -address TemplateInterpreterGenerator::generate_Float_intBitsToFloat_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Float_floatToRawIntBits_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_longBitsToDouble_entry() { return nullptr; } -address TemplateInterpreterGenerator::generate_Double_doubleToRawLongBits_entry() { return nullptr; } - diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp index 441e4c8a0b877..fbe70e37d8346 100644 --- a/src/hotspot/cpu/x86/templateTable_x86.cpp +++ b/src/hotspot/cpu/x86/templateTable_x86.cpp @@ -52,8 +52,8 @@ #define __ Disassembler::hook(__FILE__, __LINE__, _masm)-> // Global Register Names -static const Register rbcp = LP64_ONLY(r13) NOT_LP64(rsi); -static const Register rlocals = LP64_ONLY(r14) NOT_LP64(rdi); +static const Register rbcp = r13; +static const Register rlocals = r14; // Address Computation: local variables static inline Address iaddress(int n) { @@ -64,12 +64,6 @@ static inline Address laddress(int n) { return iaddress(n + 1); } -#ifndef _LP64 -static inline Address haddress(int n) { - return iaddress(n + 0); -} -#endif - static inline Address faddress(int n) { return iaddress(n); } @@ -90,12 +84,6 @@ static inline Address laddress(Register r) { return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1)); } -#ifndef _LP64 -static inline Address haddress(Register r) { - return Address(rlocals, r, Interpreter::stackElementScale(), Interpreter::local_offset_in_bytes(0)); -} -#endif - static inline Address faddress(Register r) { return iaddress(r); } @@ -157,10 +145,7 @@ static void do_oop_store(InterpreterMacroAssembler* _masm, Register val, DecoratorSet decorators = 0) { assert(val == noreg || val == rax, "parameter is just for looks"); - __ store_heap_oop(dst, val, - NOT_LP64(rdx) LP64_ONLY(rscratch2), - NOT_LP64(rbx) LP64_ONLY(r9), - NOT_LP64(rsi) LP64_ONLY(r8), decorators); + __ store_heap_oop(dst, val, rscratch2, r9, r8, decorators); } static void do_oop_load(InterpreterMacroAssembler* _masm, @@ -286,69 +271,42 @@ void TemplateTable::lconst(int value) { } else { __ movl(rax, value); } -#ifndef _LP64 - assert(value >= 0, "check this code"); - __ xorptr(rdx, rdx); -#endif } void TemplateTable::fconst(int value) { transition(vtos, ftos); - if (UseSSE >= 1) { - static float one = 1.0f, two = 2.0f; - switch (value) { - case 0: - __ xorps(xmm0, xmm0); - break; - case 1: - __ movflt(xmm0, ExternalAddress((address) &one), rscratch1); - break; - case 2: - __ movflt(xmm0, ExternalAddress((address) &two), rscratch1); - break; - default: - ShouldNotReachHere(); - break; - } - } else { -#ifdef _LP64 + static float one = 1.0f, two = 2.0f; + switch (value) { + case 0: + __ xorps(xmm0, xmm0); + break; + case 1: + __ movflt(xmm0, ExternalAddress((address) &one), rscratch1); + break; + case 2: + __ movflt(xmm0, ExternalAddress((address) &two), rscratch1); + break; + default: ShouldNotReachHere(); -#else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else if (value == 2) { __ fld1(); __ fld1(); __ faddp(); // should do a better solution here - } else { ShouldNotReachHere(); - } -#endif // _LP64 + break; } } void TemplateTable::dconst(int value) { transition(vtos, dtos); - if (UseSSE >= 2) { - static double one = 1.0; - switch (value) { - case 0: - __ xorpd(xmm0, xmm0); - break; - case 1: - __ movdbl(xmm0, ExternalAddress((address) &one), rscratch1); - break; - default: - ShouldNotReachHere(); + static double one = 1.0; + switch (value) { + case 0: + __ xorpd(xmm0, xmm0); break; - } - } else { -#ifdef _LP64 + case 1: + __ movdbl(xmm0, ExternalAddress((address) &one), rscratch1); + break; + default: ShouldNotReachHere(); -#else - if (value == 0) { __ fldz(); - } else if (value == 1) { __ fld1(); - } else { ShouldNotReachHere(); - } -#endif + break; } } @@ -366,7 +324,7 @@ void TemplateTable::sipush() { void TemplateTable::ldc(LdcType type) { transition(vtos, vtos); - Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1); + Register rarg = c_rarg1; Label call_ldc, notFloat, notClass, notInt, Done; if (is_ldc_wide(type)) { @@ -434,7 +392,7 @@ void TemplateTable::fast_aldc(LdcType type) { Register result = rax; Register tmp = rdx; - Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1); + Register rarg = c_rarg1; int index_size = is_ldc_wide(type) ? sizeof(u2) : sizeof(u1); Label resolved; @@ -497,7 +455,6 @@ void TemplateTable::ldc2_w() { // ltos __ movptr(rax, Address(rcx, rbx, Address::times_ptr, base_offset + 0 * wordSize)); - NOT_LP64(__ movptr(rdx, Address(rcx, rbx, Address::times_ptr, base_offset + 1 * wordSize))); __ push(ltos); __ jmp(Done); @@ -511,17 +468,10 @@ void TemplateTable::condy_helper(Label& Done) { const Register obj = rax; const Register off = rbx; const Register flags = rcx; - const Register rarg = NOT_LP64(rcx) LP64_ONLY(c_rarg1); + const Register rarg = c_rarg1; __ movl(rarg, (int)bytecode()); call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); -#ifndef _LP64 - // borrow rdi from locals - __ get_thread(rdi); - __ get_vm_result_2(flags, rdi); - __ restore_locals(); -#else __ get_vm_result_2(flags, r15_thread); -#endif // VMr = obj = base address to find primitive value to push // VMr2 = flags = (tos, off) using format of CPCE::_flags __ movl(off, flags); @@ -596,7 +546,6 @@ void TemplateTable::condy_helper(Label& Done) { __ jccb(Assembler::notEqual, notLong); // ltos // Loading high word first because movptr clobbers rax - NOT_LP64(__ movptr(rdx, field.plus_disp(4))); __ movptr(rax, field); __ push(ltos); __ jmp(Done); @@ -637,8 +586,8 @@ void TemplateTable::iload_internal(RewriteControl rc) { transition(vtos, itos); if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; - const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); - LP64_ONLY(assert(rbx != bc, "register damaged")); + const Register bc = c_rarg3; + assert(rbx != bc, "register damaged"); // get next byte __ load_unsigned_byte(rbx, @@ -694,7 +643,6 @@ void TemplateTable::lload() { transition(vtos, ltos); locals_index(rbx); __ movptr(rax, laddress(rbx)); - NOT_LP64(__ movl(rdx, haddress(rbx))); } void TemplateTable::fload() { @@ -732,7 +680,6 @@ void TemplateTable::wide_lload() { transition(vtos, ltos); locals_index_wide(rbx); __ movptr(rax, laddress(rbx)); - NOT_LP64(__ movl(rdx, haddress(rbx))); } void TemplateTable::wide_fload() { @@ -773,7 +720,7 @@ void TemplateTable::index_check_without_pop(Register array, Register index) { Label skip; __ jccb(Assembler::below, skip); // Pass array to create more detailed exceptions. - __ mov(NOT_LP64(rax) LP64_ONLY(c_rarg1), array); + __ mov(c_rarg1, array); __ jump(RuntimeAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry)); __ bind(skip); } @@ -794,7 +741,6 @@ void TemplateTable::laload() { // rax: index // rdx: array index_check(rdx, rax); // kills rbx - NOT_LP64(__ mov(rbx, rax)); // rbx,: index __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, noreg /* ltos */, Address(rdx, rbx, Address::times_8, @@ -895,7 +841,6 @@ void TemplateTable::iload(int n) { void TemplateTable::lload(int n) { transition(vtos, ltos); __ movptr(rax, laddress(n)); - NOT_LP64(__ movptr(rdx, haddress(n))); } void TemplateTable::fload(int n) { @@ -947,8 +892,8 @@ void TemplateTable::aload_0_internal(RewriteControl rc) { if (RewriteFrequentPairs && rc == may_rewrite) { Label rewrite, done; - const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); - LP64_ONLY(assert(rbx != bc, "register damaged")); + const Register bc = c_rarg3; + assert(rbx != bc, "register damaged"); // get next byte __ load_unsigned_byte(rbx, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); @@ -1002,7 +947,6 @@ void TemplateTable::lstore() { transition(ltos, vtos); locals_index(rbx); __ movptr(laddress(rbx), rax); - NOT_LP64(__ movptr(haddress(rbx), rdx)); } void TemplateTable::fstore() { @@ -1033,33 +977,23 @@ void TemplateTable::wide_istore() { void TemplateTable::wide_lstore() { transition(vtos, vtos); - NOT_LP64(__ pop_l(rax, rdx)); - LP64_ONLY(__ pop_l()); + __ pop_l(); locals_index_wide(rbx); __ movptr(laddress(rbx), rax); - NOT_LP64(__ movl(haddress(rbx), rdx)); } void TemplateTable::wide_fstore() { -#ifdef _LP64 transition(vtos, vtos); __ pop_f(xmm0); locals_index_wide(rbx); __ movflt(faddress(rbx), xmm0); -#else - wide_istore(); -#endif } void TemplateTable::wide_dstore() { -#ifdef _LP64 transition(vtos, vtos); __ pop_d(xmm0); locals_index_wide(rbx); __ movdbl(daddress(rbx), xmm0); -#else - wide_lstore(); -#endif } void TemplateTable::wide_astore() { @@ -1100,7 +1034,7 @@ void TemplateTable::lastore() { void TemplateTable::fastore() { transition(ftos, vtos); __ pop_i(rbx); - // value is in UseSSE >= 1 ? xmm0 : ST(0) + // value is in xmm0 // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx @@ -1113,7 +1047,7 @@ void TemplateTable::fastore() { void TemplateTable::dastore() { transition(dtos, vtos); __ pop_i(rbx); - // value is in UseSSE >= 2 ? xmm0 : ST(0) + // value is in xmm0 // rbx: index // rdx: array index_check(rdx, rbx); // prefer index in rbx @@ -1225,7 +1159,6 @@ void TemplateTable::istore(int n) { void TemplateTable::lstore(int n) { transition(ltos, vtos); __ movptr(laddress(n), rax); - NOT_LP64(__ movptr(haddress(n), rdx)); } void TemplateTable::fstore(int n) { @@ -1364,7 +1297,6 @@ void TemplateTable::iop2(Operation op) { void TemplateTable::lop2(Operation op) { transition(ltos, ltos); -#ifdef _LP64 switch (op) { case add : __ pop_l(rdx); __ addptr(rax, rdx); break; case sub : __ mov(rdx, rax); __ pop_l(rax); __ subptr(rax, rdx); break; @@ -1373,18 +1305,6 @@ void TemplateTable::lop2(Operation op) { case _xor : __ pop_l(rdx); __ xorptr(rax, rdx); break; default : ShouldNotReachHere(); } -#else - __ pop_l(rbx, rcx); - switch (op) { - case add : __ addl(rax, rbx); __ adcl(rdx, rcx); break; - case sub : __ subl(rbx, rax); __ sbbl(rcx, rdx); - __ mov (rax, rbx); __ mov (rdx, rcx); break; - case _and : __ andl(rax, rbx); __ andl(rdx, rcx); break; - case _or : __ orl (rax, rbx); __ orl (rdx, rcx); break; - case _xor : __ xorl(rax, rbx); __ xorl(rdx, rcx); break; - default : ShouldNotReachHere(); - } -#endif } void TemplateTable::idiv() { @@ -1412,21 +1332,12 @@ void TemplateTable::irem() { void TemplateTable::lmul() { transition(ltos, ltos); -#ifdef _LP64 __ pop_l(rdx); __ imulq(rax, rdx); -#else - __ pop_l(rbx, rcx); - __ push(rcx); __ push(rbx); - __ push(rdx); __ push(rax); - __ lmul(2 * wordSize, 0); - __ addptr(rsp, 4 * wordSize); // take off temporaries -#endif } void TemplateTable::ldiv() { transition(ltos, ltos); -#ifdef _LP64 __ mov(rcx, rax); __ pop_l(rax); // generate explicit div0 check @@ -1438,22 +1349,10 @@ void TemplateTable::ldiv() { // needed), which may speed up this implementation for the common case. // (see also JVM spec., p.243 & p.271) __ corrected_idivq(rcx); // kills rbx -#else - __ pop_l(rbx, rcx); - __ push(rcx); __ push(rbx); - __ push(rdx); __ push(rax); - // check if y = 0 - __ orl(rax, rdx); - __ jump_cc(Assembler::zero, - RuntimeAddress(Interpreter::_throw_ArithmeticException_entry)); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv)); - __ addptr(rsp, 4 * wordSize); // take off temporaries -#endif } void TemplateTable::lrem() { transition(ltos, ltos); -#ifdef _LP64 __ mov(rcx, rax); __ pop_l(rax); __ testq(rcx, rcx); @@ -1465,209 +1364,99 @@ void TemplateTable::lrem() { // (see also JVM spec., p.243 & p.271) __ corrected_idivq(rcx); // kills rbx __ mov(rax, rdx); -#else - __ pop_l(rbx, rcx); - __ push(rcx); __ push(rbx); - __ push(rdx); __ push(rax); - // check if y = 0 - __ orl(rax, rdx); - __ jump_cc(Assembler::zero, - RuntimeAddress(Interpreter::_throw_ArithmeticException_entry)); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem)); - __ addptr(rsp, 4 * wordSize); -#endif } void TemplateTable::lshl() { transition(itos, ltos); __ movl(rcx, rax); // get shift count - #ifdef _LP64 __ pop_l(rax); // get shift value __ shlq(rax); -#else - __ pop_l(rax, rdx); // get shift value - __ lshl(rdx, rax); -#endif } void TemplateTable::lshr() { -#ifdef _LP64 transition(itos, ltos); __ movl(rcx, rax); // get shift count __ pop_l(rax); // get shift value __ sarq(rax); -#else - transition(itos, ltos); - __ mov(rcx, rax); // get shift count - __ pop_l(rax, rdx); // get shift value - __ lshr(rdx, rax, true); -#endif } void TemplateTable::lushr() { transition(itos, ltos); -#ifdef _LP64 __ movl(rcx, rax); // get shift count __ pop_l(rax); // get shift value __ shrq(rax); -#else - __ mov(rcx, rax); // get shift count - __ pop_l(rax, rdx); // get shift value - __ lshr(rdx, rax); -#endif } void TemplateTable::fop2(Operation op) { transition(ftos, ftos); - if (UseSSE >= 1) { - switch (op) { - case add: - __ addss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case sub: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ subss(xmm0, xmm1); - break; - case mul: - __ mulss(xmm0, at_rsp()); - __ addptr(rsp, Interpreter::stackElementSize); - break; - case div: - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ divss(xmm0, xmm1); - break; - case rem: - // On x86_64 platforms the SharedRuntime::frem method is called to perform the - // modulo operation. The frem method calls the function - // double fmod(double x, double y) in math.h. The documentation of fmod states: - // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN - // (signalling or quiet) is returned. - // - // On x86_32 platforms the FPU is used to perform the modulo operation. The - // reason is that on 32-bit Windows the sign of modulo operations diverges from - // what is considered the standard (e.g., -0.0f % -3.14f is 0.0f (and not -0.0f). - // The fprem instruction used on x86_32 is functionally equivalent to - // SharedRuntime::frem in that it returns a NaN. -#ifdef _LP64 - __ movflt(xmm1, xmm0); - __ pop_f(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); -#else // !_LP64 - __ push_f(xmm0); - __ pop_f(); - __ fld_s(at_rsp()); - __ fremr(rax); - __ f2ieee(); - __ pop(rax); // pop second operand off the stack - __ push_f(); - __ pop_f(xmm0); -#endif // _LP64 - break; - default: - ShouldNotReachHere(); - break; - } - } else { -#ifdef _LP64 + switch (op) { + case add: + __ addss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case sub: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ subss(xmm0, xmm1); + break; + case mul: + __ mulss(xmm0, at_rsp()); + __ addptr(rsp, Interpreter::stackElementSize); + break; + case div: + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ divss(xmm0, xmm1); + break; + case rem: + // On x86_64 platforms the SharedRuntime::frem method is called to perform the + // modulo operation. The frem method calls the function + // double fmod(double x, double y) in math.h. The documentation of fmod states: + // "If x or y is a NaN, a NaN is returned." without specifying what type of NaN + // (signalling or quiet) is returned. + // + __ movflt(xmm1, xmm0); + __ pop_f(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); + break; + default: ShouldNotReachHere(); -#else // !_LP64 - switch (op) { - case add: __ fadd_s (at_rsp()); break; - case sub: __ fsubr_s(at_rsp()); break; - case mul: __ fmul_s (at_rsp()); break; - case div: __ fdivr_s(at_rsp()); break; - case rem: __ fld_s (at_rsp()); __ fremr(rax); break; - default : ShouldNotReachHere(); - } - __ f2ieee(); - __ pop(rax); // pop second operand off the stack -#endif // _LP64 + break; } } void TemplateTable::dop2(Operation op) { transition(dtos, dtos); - if (UseSSE >= 2) { - switch (op) { - case add: - __ addsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case sub: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ subsd(xmm0, xmm1); - break; - case mul: - __ mulsd(xmm0, at_rsp()); - __ addptr(rsp, 2 * Interpreter::stackElementSize); - break; - case div: - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ divsd(xmm0, xmm1); - break; - case rem: - // Similar to fop2(), the modulo operation is performed using the - // SharedRuntime::drem method (on x86_64 platforms) or using the - // FPU (on x86_32 platforms) for the same reasons as mentioned in fop2(). -#ifdef _LP64 - __ movdbl(xmm1, xmm0); - __ pop_d(xmm0); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); -#else // !_LP64 - __ push_d(xmm0); - __ pop_d(); - __ fld_d(at_rsp()); - __ fremr(rax); - __ d2ieee(); - __ pop(rax); - __ pop(rdx); - __ push_d(); - __ pop_d(xmm0); -#endif // _LP64 - break; - default: - ShouldNotReachHere(); - break; - } - } else { -#ifdef _LP64 + switch (op) { + case add: + __ addsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case sub: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ subsd(xmm0, xmm1); + break; + case mul: + __ mulsd(xmm0, at_rsp()); + __ addptr(rsp, 2 * Interpreter::stackElementSize); + break; + case div: + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ divsd(xmm0, xmm1); + break; + case rem: + // Similar to fop2(), the modulo operation is performed using the + // SharedRuntime::drem method (on x86_64 platforms) or using the + __ movdbl(xmm1, xmm0); + __ pop_d(xmm0); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); + break; + default: ShouldNotReachHere(); -#else // !_LP64 - switch (op) { - case add: __ fadd_d (at_rsp()); break; - case sub: __ fsubr_d(at_rsp()); break; - case mul: { - // strict semantics - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1())); - __ fmulp(); - __ fmul_d (at_rsp()); - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2())); - __ fmulp(); - break; - } - case div: { - // strict semantics - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias1())); - __ fmul_d (at_rsp()); - __ fdivrp(); - __ fld_x(ExternalAddress(StubRoutines::x86::addr_fpu_subnormal_bias2())); - __ fmulp(); - break; - } - case rem: __ fld_d (at_rsp()); __ fremr(rax); break; - default : ShouldNotReachHere(); - } - __ d2ieee(); - // Pop double precision number from rsp. - __ pop(rax); - __ pop(rdx); -#endif // _LP64 + break; } } @@ -1678,8 +1467,7 @@ void TemplateTable::ineg() { void TemplateTable::lneg() { transition(ltos, ltos); - LP64_ONLY(__ negq(rax)); - NOT_LP64(__ lneg(rdx, rax)); + __ negq(rax); } // Note: 'double' and 'long long' have 32-bits alignment on x86. @@ -1699,28 +1487,15 @@ static jlong double_signflip_pool[2*2]; void TemplateTable::fneg() { transition(ftos, ftos); - if (UseSSE >= 1) { - static jlong *float_signflip = double_quadword(&float_signflip_pool[1], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); - __ xorps(xmm0, ExternalAddress((address) float_signflip), rscratch1); - } else { - LP64_ONLY(ShouldNotReachHere()); - NOT_LP64(__ fchs()); - } + static jlong *float_signflip = double_quadword(&float_signflip_pool[1], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); + __ xorps(xmm0, ExternalAddress((address) float_signflip), rscratch1); } void TemplateTable::dneg() { transition(dtos, dtos); - if (UseSSE >= 2) { - static jlong *double_signflip = - double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); - __ xorpd(xmm0, ExternalAddress((address) double_signflip), rscratch1); - } else { -#ifdef _LP64 - ShouldNotReachHere(); -#else - __ fchs(); -#endif - } + static jlong *double_signflip = + double_quadword(&double_signflip_pool[1], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); + __ xorpd(xmm0, ExternalAddress((address) double_signflip), rscratch1); } void TemplateTable::iinc() { @@ -1742,8 +1517,6 @@ void TemplateTable::wide_iinc() { } void TemplateTable::convert() { -#ifdef _LP64 - // Checking #ifdef ASSERT { TosState tos_in = ilgl; @@ -1870,203 +1643,10 @@ void TemplateTable::convert() { default: ShouldNotReachHere(); } -#else // !_LP64 - // Checking -#ifdef ASSERT - { TosState tos_in = ilgl; - TosState tos_out = ilgl; - switch (bytecode()) { - case Bytecodes::_i2l: // fall through - case Bytecodes::_i2f: // fall through - case Bytecodes::_i2d: // fall through - case Bytecodes::_i2b: // fall through - case Bytecodes::_i2c: // fall through - case Bytecodes::_i2s: tos_in = itos; break; - case Bytecodes::_l2i: // fall through - case Bytecodes::_l2f: // fall through - case Bytecodes::_l2d: tos_in = ltos; break; - case Bytecodes::_f2i: // fall through - case Bytecodes::_f2l: // fall through - case Bytecodes::_f2d: tos_in = ftos; break; - case Bytecodes::_d2i: // fall through - case Bytecodes::_d2l: // fall through - case Bytecodes::_d2f: tos_in = dtos; break; - default : ShouldNotReachHere(); - } - switch (bytecode()) { - case Bytecodes::_l2i: // fall through - case Bytecodes::_f2i: // fall through - case Bytecodes::_d2i: // fall through - case Bytecodes::_i2b: // fall through - case Bytecodes::_i2c: // fall through - case Bytecodes::_i2s: tos_out = itos; break; - case Bytecodes::_i2l: // fall through - case Bytecodes::_f2l: // fall through - case Bytecodes::_d2l: tos_out = ltos; break; - case Bytecodes::_i2f: // fall through - case Bytecodes::_l2f: // fall through - case Bytecodes::_d2f: tos_out = ftos; break; - case Bytecodes::_i2d: // fall through - case Bytecodes::_l2d: // fall through - case Bytecodes::_f2d: tos_out = dtos; break; - default : ShouldNotReachHere(); - } - transition(tos_in, tos_out); - } -#endif // ASSERT - - // Conversion - // (Note: use push(rcx)/pop(rcx) for 1/2-word stack-ptr manipulation) - switch (bytecode()) { - case Bytecodes::_i2l: - __ extend_sign(rdx, rax); - break; - case Bytecodes::_i2f: - if (UseSSE >= 1) { - __ cvtsi2ssl(xmm0, rax); - } else { - __ push(rax); // store int on tos - __ fild_s(at_rsp()); // load int to ST0 - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp - } - break; - case Bytecodes::_i2d: - if (UseSSE >= 2) { - __ cvtsi2sdl(xmm0, rax); - } else { - __ push(rax); // add one slot for d2ieee() - __ push(rax); // store int on tos - __ fild_s(at_rsp()); // load int to ST0 - __ d2ieee(); // truncate to double size - __ pop(rcx); // adjust rsp - __ pop(rcx); - } - break; - case Bytecodes::_i2b: - __ shll(rax, 24); // truncate upper 24 bits - __ sarl(rax, 24); // and sign-extend byte - LP64_ONLY(__ movsbl(rax, rax)); - break; - case Bytecodes::_i2c: - __ andl(rax, 0xFFFF); // truncate upper 16 bits - LP64_ONLY(__ movzwl(rax, rax)); - break; - case Bytecodes::_i2s: - __ shll(rax, 16); // truncate upper 16 bits - __ sarl(rax, 16); // and sign-extend short - LP64_ONLY(__ movswl(rax, rax)); - break; - case Bytecodes::_l2i: - /* nothing to do */ - break; - case Bytecodes::_l2f: - // On 64-bit platforms, the cvtsi2ssq instruction is used to convert - // 64-bit long values to floats. On 32-bit platforms it is not possible - // to use that instruction with 64-bit operands, therefore the FPU is - // used to perform the conversion. - __ push(rdx); // store long on tos - __ push(rax); - __ fild_d(at_rsp()); // load long to ST0 - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp - __ pop(rcx); - if (UseSSE >= 1) { - __ push_f(); - __ pop_f(xmm0); - } - break; - case Bytecodes::_l2d: - // On 32-bit platforms the FPU is used for conversion because on - // 32-bit platforms it is not not possible to use the cvtsi2sdq - // instruction with 64-bit operands. - __ push(rdx); // store long on tos - __ push(rax); - __ fild_d(at_rsp()); // load long to ST0 - __ d2ieee(); // truncate to double size - __ pop(rcx); // adjust rsp - __ pop(rcx); - if (UseSSE >= 2) { - __ push_d(); - __ pop_d(xmm0); - } - break; - case Bytecodes::_f2i: - // SharedRuntime::f2i does not differentiate between sNaNs and qNaNs - // as it returns 0 for any NaN. - if (UseSSE >= 1) { - __ push_f(xmm0); - } else { - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack - } - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); - break; - case Bytecodes::_f2l: - // SharedRuntime::f2l does not differentiate between sNaNs and qNaNs - // as it returns 0 for any NaN. - if (UseSSE >= 1) { - __ push_f(xmm0); - } else { - __ push(rcx); // reserve space for argument - __ fstp_s(at_rsp()); // pass float argument on stack - } - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); - break; - case Bytecodes::_f2d: - if (UseSSE < 1) { - /* nothing to do */ - } else if (UseSSE == 1) { - __ push_f(xmm0); - __ pop_f(); - } else { // UseSSE >= 2 - __ cvtss2sd(xmm0, xmm0); - } - break; - case Bytecodes::_d2i: - if (UseSSE >= 2) { - __ push_d(xmm0); - } else { - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack - } - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 2); - break; - case Bytecodes::_d2l: - if (UseSSE >= 2) { - __ push_d(xmm0); - } else { - __ push(rcx); // reserve space for argument - __ push(rcx); - __ fstp_d(at_rsp()); // pass double argument on stack - } - __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 2); - break; - case Bytecodes::_d2f: - if (UseSSE <= 1) { - __ push(rcx); // reserve space for f2ieee() - __ f2ieee(); // truncate to float size - __ pop(rcx); // adjust rsp - if (UseSSE == 1) { - // The cvtsd2ss instruction is not available if UseSSE==1, therefore - // the conversion is performed using the FPU in this case. - __ push_f(); - __ pop_f(xmm0); - } - } else { // UseSSE >= 2 - __ cvtsd2ss(xmm0, xmm0); - } - break; - default : - ShouldNotReachHere(); - } -#endif // _LP64 } void TemplateTable::lcmp() { transition(ltos, itos); -#ifdef _LP64 Label done; __ pop_l(rdx); __ cmpq(rdx, rax); @@ -2075,57 +1655,34 @@ void TemplateTable::lcmp() { __ setb(Assembler::notEqual, rax); __ movzbl(rax, rax); __ bind(done); -#else - - // y = rdx:rax - __ pop_l(rbx, rcx); // get x = rcx:rbx - __ lcmp2int(rcx, rbx, rdx, rax);// rcx := cmp(x, y) - __ mov(rax, rcx); -#endif } void TemplateTable::float_cmp(bool is_float, int unordered_result) { - if ((is_float && UseSSE >= 1) || - (!is_float && UseSSE >= 2)) { - Label done; - if (is_float) { - // XXX get rid of pop here, use ... reg, mem32 - __ pop_f(xmm1); - __ ucomiss(xmm1, xmm0); - } else { - // XXX get rid of pop here, use ... reg, mem64 - __ pop_d(xmm1); - __ ucomisd(xmm1, xmm0); - } - if (unordered_result < 0) { - __ movl(rax, -1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::below, done); - __ setb(Assembler::notEqual, rdx); - __ movzbl(rax, rdx); - } else { - __ movl(rax, 1); - __ jccb(Assembler::parity, done); - __ jccb(Assembler::above, done); - __ movl(rax, 0); - __ jccb(Assembler::equal, done); - __ decrementl(rax); - } - __ bind(done); + Label done; + if (is_float) { + // XXX get rid of pop here, use ... reg, mem32 + __ pop_f(xmm1); + __ ucomiss(xmm1, xmm0); } else { -#ifdef _LP64 - ShouldNotReachHere(); -#else // !_LP64 - if (is_float) { - __ fld_s(at_rsp()); - } else { - __ fld_d(at_rsp()); - __ pop(rdx); - } - __ pop(rcx); - __ fcmp2int(rax, unordered_result < 0); -#endif // _LP64 + // XXX get rid of pop here, use ... reg, mem64 + __ pop_d(xmm1); + __ ucomisd(xmm1, xmm0); + } + if (unordered_result < 0) { + __ movl(rax, -1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::below, done); + __ setb(Assembler::notEqual, rdx); + __ movzbl(rax, rdx); + } else { + __ movl(rax, 1); + __ jccb(Assembler::parity, done); + __ jccb(Assembler::above, done); + __ movl(rax, 0); + __ jccb(Assembler::equal, done); + __ decrementl(rax); } + __ bind(done); } void TemplateTable::branch(bool is_jsr, bool is_wide) { @@ -2149,7 +1706,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { if (!is_wide) { __ sarl(rdx, 16); } - LP64_ONLY(__ movl2ptr(rdx, rdx)); + __ movl2ptr(rdx, rdx); // Handle all the JSR stuff here, then exit. // It's much shorter and cleaner than intermingling with the non-JSR @@ -2268,19 +1825,16 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // it will be preserved in rbx. __ mov(rbx, rax); - NOT_LP64(__ get_thread(rcx)); - call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); // rax is OSR buffer, move it to expected parameter location - LP64_ONLY(__ mov(j_rarg0, rax)); - NOT_LP64(__ mov(rcx, rax)); + __ mov(j_rarg0, rax); // We use j_rarg definitions here so that registers don't conflict as parameter // registers change across platforms as we are in the midst of a calling // sequence to the OSR nmethod and we don't want collision. These are NOT parameters. - const Register retaddr = LP64_ONLY(j_rarg2) NOT_LP64(rdi); - const Register sender_sp = LP64_ONLY(j_rarg1) NOT_LP64(rdx); + const Register retaddr = j_rarg2; + const Register sender_sp = j_rarg1; // pop the interpreter frame __ movptr(sender_sp, Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender sp @@ -2351,8 +1905,7 @@ void TemplateTable::if_acmp(Condition cc) { void TemplateTable::ret() { transition(vtos, vtos); locals_index(rbx); - LP64_ONLY(__ movslq(rbx, iaddress(rbx))); // get return bci, compute return bcp - NOT_LP64(__ movptr(rbx, iaddress(rbx))); + __ movslq(rbx, iaddress(rbx)); // get return bci, compute return bcp __ profile_ret(rbx, rcx); __ get_method(rax); __ movptr(rbcp, Address(rax, Method::const_offset())); @@ -2396,7 +1949,7 @@ void TemplateTable::tableswitch() { // continue execution __ bind(continue_execution); __ bswapl(rdx); - LP64_ONLY(__ movl2ptr(rdx, rdx)); + __ movl2ptr(rdx, rdx); __ load_unsigned_byte(rbx, Address(rbcp, rdx, Address::times_1)); __ addptr(rbcp, rdx); __ dispatch_only(vtos, true); @@ -2486,8 +2039,6 @@ void TemplateTable::fast_binaryswitch() { const Register temp = rsi; // Find array start - NOT_LP64(__ save_bcp()); - __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to // get rid of this // instruction (change @@ -2544,10 +2095,7 @@ void TemplateTable::fast_binaryswitch() { __ movl(j , Address(array, i, Address::times_8, BytesPerInt)); __ profile_switch_case(i, key, array); __ bswapl(j); - LP64_ONLY(__ movslq(j, j)); - - NOT_LP64(__ restore_bcp()); - NOT_LP64(__ restore_locals()); // restore rdi + __ movslq(j, j); __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1)); __ addptr(rbcp, j); @@ -2558,10 +2106,7 @@ void TemplateTable::fast_binaryswitch() { __ profile_switch_default(i); __ movl(j, Address(array, -2 * BytesPerInt)); __ bswapl(j); - LP64_ONLY(__ movslq(j, j)); - - NOT_LP64(__ restore_bcp()); - NOT_LP64(__ restore_locals()); + __ movslq(j, j); __ load_unsigned_byte(rbx, Address(rbcp, j, Address::times_1)); __ addptr(rbcp, j); @@ -2576,7 +2121,7 @@ void TemplateTable::_return(TosState state) { if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { assert(state == vtos, "only valid state"); - Register robj = LP64_ONLY(c_rarg1) NOT_LP64(rax); + Register robj = c_rarg1; __ movptr(robj, aaddress(0)); __ load_klass(rdi, robj, rscratch1); __ testb(Address(rdi, Klass::misc_flags_offset()), KlassFlags::_misc_has_finalizer); @@ -2591,13 +2136,7 @@ void TemplateTable::_return(TosState state) { if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { Label no_safepoint; NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); -#ifdef _LP64 __ testb(Address(r15_thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); -#else - const Register thread = rdi; - __ get_thread(thread); - __ testb(Address(thread, JavaThread::polling_word_offset()), SafepointMechanism::poll_bit()); -#endif __ jcc(Assembler::zero, no_safepoint); __ push(state); __ push_cont_fastpath(); @@ -2695,8 +2234,7 @@ void TemplateTable::resolve_cache_and_index_for_method(int byte_no, if (VM_Version::supports_fast_class_init_checks() && bytecode() == Bytecodes::_invokestatic) { const Register method = temp; const Register klass = temp; - const Register thread = LP64_ONLY(r15_thread) NOT_LP64(noreg); - assert(thread != noreg, "x86_32 not supported"); + const Register thread = r15_thread; __ movptr(method, Address(cache, in_bytes(ResolvedMethodEntry::method_offset()))); __ load_method_holder(klass, method); @@ -2822,12 +2360,8 @@ void TemplateTable::load_invokedynamic_entry(Register method) { { const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); ExternalAddress table(table_addr); -#ifdef _LP64 __ lea(rscratch1, table); __ movptr(index, Address(rscratch1, index, Address::times_ptr)); -#else - __ movptr(index, ArrayAddress(table, Address(noreg, index, Address::times_ptr))); -#endif // _LP64 } // push return address @@ -2980,13 +2514,13 @@ void TemplateTable::pop_and_check_object(Register r) { void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { transition(vtos, vtos); - const Register obj = LP64_ONLY(c_rarg3) NOT_LP64(rcx); + const Register obj = c_rarg3; const Register cache = rcx; const Register index = rdx; const Register off = rbx; const Register tos_state = rax; const Register flags = rdx; - const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // uses same reg as obj, so don't mix them + const Register bc = c_rarg3; // uses same reg as obj, so don't mix them resolve_cache_and_index_for_field(byte_no, cache, index); jvmti_post_field_access(cache, index, is_static, false); @@ -3077,12 +2611,12 @@ void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteContr __ cmpl(tos_state, ltos); __ jcc(Assembler::notEqual, notLong); // ltos - // Generate code as if volatile (x86_32). There just aren't enough registers to - // save that information and this code is faster than the test. - __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, noreg /* ltos */, field, noreg, noreg); + __ access_load_at(T_LONG, IN_HEAP, noreg /* ltos */, field, noreg, noreg); __ push(ltos); // Rewrite bytecode to be faster - LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx)); + if (!is_static && rc == may_rewrite) { + patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx); + } __ jmp(Done); __ bind(notLong); @@ -3142,9 +2676,9 @@ void TemplateTable::getstatic(int byte_no) { // The function may destroy various registers, just not the cache and index registers. void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { // Cache is rcx and index is rdx - const Register entry = LP64_ONLY(c_rarg2) NOT_LP64(rax); // ResolvedFieldEntry - const Register obj = LP64_ONLY(c_rarg1) NOT_LP64(rbx); // Object pointer - const Register value = LP64_ONLY(c_rarg3) NOT_LP64(rcx); // JValue object + const Register entry = c_rarg2; // ResolvedFieldEntry + const Register obj = c_rarg1; // Object pointer + const Register value = c_rarg3; // JValue object if (JvmtiExport::can_post_field_modification()) { // Check to see if a field modification watch has been set before @@ -3166,11 +2700,7 @@ void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is // the object. We don't know the size of the value, though; it // could be one or two words depending on its type. As a result, // we must find the type to determine where the object is. -#ifndef _LP64 - Label two_word, valsize_known; -#endif __ load_unsigned_byte(value, Address(entry, in_bytes(ResolvedFieldEntry::type_offset()))); -#ifdef _LP64 __ movptr(obj, at_tos_p1()); // initially assume a one word jvalue __ cmpl(value, ltos); __ cmovptr(Assembler::equal, @@ -3178,22 +2708,6 @@ void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is __ cmpl(value, dtos); __ cmovptr(Assembler::equal, obj, at_tos_p2()); // dtos (two word jvalue) -#else - __ mov(obj, rsp); - __ cmpl(value, ltos); - __ jccb(Assembler::equal, two_word); - __ cmpl(value, dtos); - __ jccb(Assembler::equal, two_word); - __ addptr(obj, Interpreter::expr_offset_in_bytes(1)); // one word jvalue (not ltos, dtos) - __ jmpb(valsize_known); - - __ bind(two_word); - __ addptr(obj, Interpreter::expr_offset_in_bytes(2)); // two words jvalue - - __ bind(valsize_known); - // setup object pointer - __ movptr(obj, Address(obj, 0)); -#endif } // object (tos) @@ -3252,13 +2766,12 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri // field addresses const Address field(obj, off, Address::times_1, 0*wordSize); - NOT_LP64( const Address hi(obj, off, Address::times_1, 1*wordSize);) Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj; Label Done; - const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx); + const Register bc = c_rarg3; // Test TOS state __ testl(tos_state, tos_state); @@ -3359,13 +2872,10 @@ void TemplateTable::putfield_or_static_helper(int byte_no, bool is_static, Rewri { __ pop(ltos); if (!is_static) pop_and_check_object(obj); - // MO_RELAXED: generate atomic store for the case of volatile field (important for x86_32) - __ access_store_at(T_LONG, IN_HEAP | MO_RELAXED, field, noreg /* ltos*/, noreg, noreg, noreg); -#ifdef _LP64 + __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos*/, noreg, noreg, noreg); if (!is_static && rc == may_rewrite) { patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no); } -#endif // _LP64 __ jmp(Done); } @@ -3426,7 +2936,7 @@ void TemplateTable::putstatic(int byte_no) { void TemplateTable::jvmti_post_fast_field_mod() { - const Register scratch = LP64_ONLY(c_rarg3) NOT_LP64(rcx); + const Register scratch = c_rarg3; if (JvmtiExport::can_post_field_modification()) { // Check to see if a field modification watch has been set before @@ -3457,14 +2967,12 @@ void TemplateTable::jvmti_post_fast_field_mod() { } __ mov(scratch, rsp); // points to jvalue on the stack // access constant pool cache entry - LP64_ONLY(__ load_field_entry(c_rarg2, rax)); - NOT_LP64(__ load_field_entry(rax, rdx)); + __ load_field_entry(c_rarg2, rax); __ verify_oop(rbx); // rbx: object pointer copied above // c_rarg2: cache entry pointer // c_rarg3: jvalue object on the stack - LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3)); - NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, rax, rcx)); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3); switch (bytecode()) { // restore tos values case Bytecodes::_fast_aputfield: __ pop_ptr(rax); break; @@ -3527,11 +3035,7 @@ void TemplateTable::fast_storefield_helper(Address field, Register rax) { do_oop_store(_masm, field, rax); break; case Bytecodes::_fast_lputfield: -#ifdef _LP64 __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg, noreg); -#else - __ stop("should not be rewritten"); -#endif break; case Bytecodes::_fast_iputfield: __ access_store_at(T_INT, IN_HEAP, field, rax, noreg, noreg, noreg); @@ -3571,15 +3075,13 @@ void TemplateTable::fast_accessfield(TosState state) { __ testl(rcx, rcx); __ jcc(Assembler::zero, L1); // access constant pool cache entry - LP64_ONLY(__ load_field_entry(c_rarg2, rcx)); - NOT_LP64(__ load_field_entry(rcx, rdx)); + __ load_field_entry(c_rarg2, rcx); __ verify_oop(rax); __ push_ptr(rax); // save object pointer before call_VM() clobbers it - LP64_ONLY(__ mov(c_rarg1, rax)); + __ mov(c_rarg1, rax); // c_rarg1: object pointer copied above // c_rarg2: cache entry pointer - LP64_ONLY(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), c_rarg1, c_rarg2)); - NOT_LP64(__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), rax, rcx)); + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), c_rarg1, c_rarg2); __ pop_ptr(rax); // restore object pointer __ bind(L1); } @@ -3600,11 +3102,7 @@ void TemplateTable::fast_accessfield(TosState state) { __ verify_oop(rax); break; case Bytecodes::_fast_lgetfield: -#ifdef _LP64 __ access_load_at(T_LONG, IN_HEAP, noreg /* ltos */, field, noreg, noreg); -#else - __ stop("should not be rewritten"); -#endif break; case Bytecodes::_fast_igetfield: __ access_load_at(T_INT, IN_HEAP, rax, field, noreg, noreg); @@ -3709,12 +3207,8 @@ void TemplateTable::prepare_invoke(Register cache, Register recv, Register flags { const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code); ExternalAddress table(table_addr); -#ifdef _LP64 __ lea(rscratch1, table); __ movptr(flags, Address(rscratch1, flags, Address::times_ptr)); -#else - __ movptr(flags, ArrayAddress(table, Address(noreg, flags, Address::times_ptr))); -#endif // _LP64 } // push return address @@ -3946,15 +3440,10 @@ void TemplateTable::invokeinterface(int byte_no) { __ restore_bcp(); // rbcp must be correct for exception handler (was destroyed) __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) // Pass arguments for generating a verbose error message. -#ifdef _LP64 recvKlass = c_rarg1; Register method = c_rarg2; if (recvKlass != rdx) { __ movq(recvKlass, rdx); } if (method != rcx) { __ movq(method, rcx); } -#else - recvKlass = rdx; - Register method = rcx; -#endif __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); // The call_VM checks for exception, so we should never return here. @@ -3966,7 +3455,9 @@ void TemplateTable::invokeinterface(int byte_no) { __ restore_bcp(); // rbcp must be correct for exception handler (was destroyed) __ restore_locals(); // make sure locals pointer is correct as well (was destroyed) // Pass arguments for generating a verbose error message. - LP64_ONLY( if (recvKlass != rdx) { __ movq(recvKlass, rdx); } ) + if (recvKlass != rdx) { + __ movq(recvKlass, rdx); + } __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, rax); // the call_VM checks for exception, so we should never return here. @@ -4049,13 +3540,8 @@ void TemplateTable::_new() { // make sure klass is initialized // init_state needs acquire, but x86 is TSO, and so we are already good. -#ifdef _LP64 assert(VM_Version::supports_fast_class_init_checks(), "must support fast class initialization checks"); __ clinit_barrier(rcx, r15_thread, nullptr /*L_fast_path*/, &slow_case); -#else - __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized); - __ jcc(Assembler::notEqual, slow_case); -#endif // get instance_size in InstanceKlass (scaled to a count of bytes) __ movl(rdx, Address(rcx, Klass::layout_helper_offset())); @@ -4072,10 +3558,9 @@ void TemplateTable::_new() { // // Go to slow path. - const Register thread = LP64_ONLY(r15_thread) NOT_LP64(rcx); + const Register thread = r15_thread; if (UseTLAB) { - NOT_LP64(__ get_thread(thread);) __ tlab_allocate(thread, rax, rdx, 0, rcx, rbx, slow_case); if (ZeroTLAB) { // the fields have been already cleared @@ -4114,7 +3599,6 @@ void TemplateTable::_new() { int header_size_bytes = oopDesc::header_size() * HeapWordSize; assert(is_aligned(header_size_bytes, BytesPerLong), "oop header size must be 8-byte-aligned"); __ movptr(Address(rax, rdx, Address::times_8, header_size_bytes - 1*oopSize), rcx); - NOT_LP64(__ movptr(Address(rax, rdx, Address::times_8, header_size_bytes - 2*oopSize), rcx)); __ decrement(rdx); __ jcc(Assembler::notZero, loop); } @@ -4129,10 +3613,8 @@ void TemplateTable::_new() { __ movptr(Address(rax, oopDesc::mark_offset_in_bytes()), (intptr_t)markWord::prototype().value()); // header __ pop(rcx); // get saved klass back in the register. -#ifdef _LP64 __ xorl(rsi, rsi); // use zero reg to clear memory (shorter code) __ store_klass_gap(rax, rsi); // zero klass gap for compressed oops -#endif __ store_klass(rax, rcx, rscratch1); // klass } @@ -4152,12 +3634,9 @@ void TemplateTable::_new() { __ pop(rcx); // restore stack pointer to what it was when we came in. __ bind(slow_case_no_pop); - Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rax); - Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx); - - __ get_constant_pool(rarg1); - __ get_unsigned_2_byte_index_at_bcp(rarg2, 1); - call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2); + __ get_constant_pool(c_rarg1); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), c_rarg1, c_rarg2); __ verify_oop(rax); // continue @@ -4166,22 +3645,18 @@ void TemplateTable::_new() { void TemplateTable::newarray() { transition(itos, atos); - Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rdx); - __ load_unsigned_byte(rarg1, at_bcp(1)); + __ load_unsigned_byte(c_rarg1, at_bcp(1)); call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), - rarg1, rax); + c_rarg1, rax); } void TemplateTable::anewarray() { transition(itos, atos); - Register rarg1 = LP64_ONLY(c_rarg1) NOT_LP64(rcx); - Register rarg2 = LP64_ONLY(c_rarg2) NOT_LP64(rdx); - - __ get_unsigned_2_byte_index_at_bcp(rarg2, 1); - __ get_constant_pool(rarg1); + __ get_unsigned_2_byte_index_at_bcp(c_rarg2, 1); + __ get_constant_pool(c_rarg1); call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), - rarg1, rarg2, rax); + c_rarg1, c_rarg2, rax); } void TemplateTable::arraylength() { @@ -4208,14 +3683,7 @@ void TemplateTable::checkcast() { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); // vm_result_2 has metadata result -#ifndef _LP64 - // borrow rdi from locals - __ get_thread(rdi); - __ get_vm_result_2(rax, rdi); - __ restore_locals(); -#else __ get_vm_result_2(rax, r15_thread); -#endif __ pop_ptr(rdx); // restore receiver __ jmpb(resolved); @@ -4272,14 +3740,7 @@ void TemplateTable::instanceof() { call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); // vm_result_2 has metadata result -#ifndef _LP64 - // borrow rdi from locals - __ get_thread(rdi); - __ get_vm_result_2(rax, rdi); - __ restore_locals(); -#else __ get_vm_result_2(rax, r15_thread); -#endif __ pop_ptr(rdx); // restore receiver __ verify_oop(rdx); @@ -4327,21 +3788,19 @@ void TemplateTable::_breakpoint() { transition(vtos, vtos); - Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rcx); - // get the unpatched byte code - __ get_method(rarg); + __ get_method(c_rarg1); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::get_original_bytecode_at), - rarg, rbcp); + c_rarg1, rbcp); __ mov(rbx, rax); // why? // post the breakpoint event - __ get_method(rarg); + __ get_method(c_rarg1); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), - rarg, rbcp); + c_rarg1, rbcp); // complete the execution of original bytecode __ dispatch_only_normal(vtos); @@ -4387,9 +3846,9 @@ void TemplateTable::monitorenter() { Label allocated; - Register rtop = LP64_ONLY(c_rarg3) NOT_LP64(rcx); - Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx); - Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rdx); + Register rtop = c_rarg3; + Register rbot = c_rarg2; + Register rmon = c_rarg1; // initialize entry pointer __ xorl(rmon, rmon); // points to free slot or null @@ -4485,8 +3944,8 @@ void TemplateTable::monitorexit() { rbp, frame::interpreter_frame_initial_sp_offset * wordSize); const int entry_size = frame::interpreter_frame_monitor_size_in_bytes(); - Register rtop = LP64_ONLY(c_rarg1) NOT_LP64(rdx); - Register rbot = LP64_ONLY(c_rarg2) NOT_LP64(rbx); + Register rtop = c_rarg1; + Register rbot = c_rarg2; Label found; @@ -4540,13 +3999,12 @@ void TemplateTable::wide() { void TemplateTable::multianewarray() { transition(vtos, atos); - Register rarg = LP64_ONLY(c_rarg1) NOT_LP64(rax); __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions // last dim is on top of stack; we want address of first one: // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize // the latter wordSize to point to the beginning of the array. - __ lea(rarg, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize)); - call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg); + __ lea(c_rarg1, Address(rsp, rax, Interpreter::stackElementScale(), -wordSize)); + call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), c_rarg1); __ load_unsigned_byte(rbx, at_bcp(3)); __ lea(rsp, Address(rsp, rbx, Interpreter::stackElementScale())); // get rid of counts } diff --git a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp b/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp deleted file mode 100644 index 6ccf965a771d3..0000000000000 --- a/src/hotspot/cpu/x86/upcallLinker_x86_32.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * Copyright (c) 2020, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ - -#include "precompiled.hpp" -#include "prims/upcallLinker.hpp" - -address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature, - BasicType* out_sig_bt, int total_out_args, - BasicType ret_type, - jobject jabi, jobject jconv, - bool needs_return_buffer, int ret_buf_size) { - ShouldNotCallThis(); - return nullptr; -} diff --git a/src/hotspot/cpu/x86/vmStructs_x86.hpp b/src/hotspot/cpu/x86/vmStructs_x86.hpp index 4569bd9a21623..cb0e2d9bb9c45 100644 --- a/src/hotspot/cpu/x86/vmStructs_x86.hpp +++ b/src/hotspot/cpu/x86/vmStructs_x86.hpp @@ -35,8 +35,8 @@ #define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ - LP64_ONLY(declare_constant(frame::arg_reg_save_area_bytes)) \ - declare_constant(frame::interpreter_frame_sender_sp_offset) \ + declare_constant(frame::arg_reg_save_area_bytes) \ + declare_constant(frame::interpreter_frame_sender_sp_offset) \ declare_constant(frame::interpreter_frame_last_sp_offset) #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp index 688cd4fa5a6d1..c960e042ce206 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.cpp +++ b/src/hotspot/cpu/x86/vm_version_x86.cpp @@ -73,8 +73,6 @@ static get_cpu_info_stub_t get_cpu_info_stub = nullptr; static detect_virt_stub_t detect_virt_stub = nullptr; static clear_apx_test_state_t clear_apx_test_state_stub = nullptr; -#ifdef _LP64 - bool VM_Version::supports_clflush() { // clflush should always be available on x86_64 // if not we are in real trouble because we rely on it @@ -88,7 +86,6 @@ bool VM_Version::supports_clflush() { assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); return true; } -#endif #define CPUID_STANDARD_FN 0x0 #define CPUID_STANDARD_FN_1 0x1 @@ -108,7 +105,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} -#if defined(_LP64) address clear_apx_test_state() { # define __ _masm-> address start = __ pc(); @@ -127,7 +123,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ ret(0); return start; } -#endif address generate_get_cpu_info() { // Flags to test CPU type. @@ -155,11 +150,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { // LP64: rcx and rdx are first and second argument registers on windows __ push(rbp); -#ifdef _LP64 __ mov(rbp, c_rarg0); // cpuid_info address -#else - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address -#endif __ push(rbx); __ push(rsi); __ pushf(); // preserve rbx, and flags @@ -419,7 +410,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movl(Address(rsi, 8), rcx); __ movl(Address(rsi,12), rdx); -#if defined(_LP64) // // Check if OS has enabled XGETBV instruction to access XCR0 // (OSXSAVE feature flag) and CPU supports APX @@ -454,7 +444,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movq(Address(rsi, 8), r31); UseAPX = save_apx; -#endif #endif __ bind(vector_save_restore); // @@ -528,10 +517,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ movdl(xmm0, rcx); __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); -#ifdef _LP64 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); -#endif VM_Version::clean_cpuFeatures(); __ jmp(save_restore_except); } @@ -557,10 +544,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ pshufd(xmm0, xmm0, 0x00); __ vinsertf128_high(xmm0, xmm0); __ vmovdqu(xmm7, xmm0); -#ifdef _LP64 __ vmovdqu(xmm8, xmm0); __ vmovdqu(xmm15, xmm0); -#endif VM_Version::clean_cpuFeatures(); __ bind(save_restore_except); @@ -601,10 +586,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); -#ifdef _LP64 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); -#endif #ifdef _WINDOWS __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); @@ -629,10 +612,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); __ vmovdqu(Address(rsi, 0), xmm0); __ vmovdqu(Address(rsi, 32), xmm7); -#ifdef _LP64 __ vmovdqu(Address(rsi, 64), xmm8); __ vmovdqu(Address(rsi, 96), xmm15); -#endif #ifdef _WINDOWS __ vmovdqu(xmm15, Address(rsp, 0)); @@ -688,13 +669,8 @@ class VM_Version_StubGenerator: public StubCodeGenerator { __ push(rbx); __ push(rsi); // for Windows -#ifdef _LP64 __ mov(rax, c_rarg0); // CPUID leaf __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) -#else - __ movptr(rax, Address(rsp, 16)); // CPUID leaf - __ movptr(rsi, Address(rsp, 20)); // register array address -#endif __ cpuid(); @@ -738,11 +714,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator { // LP64: rcx and rdx are first and second argument registers on windows __ push(rbp); -#ifdef _LP64 __ mov(rbp, c_rarg0); // cpuid_info address -#else - __ movptr(rbp, Address(rsp, 8)); // cpuid_info address -#endif __ push(rbx); __ push(rsi); __ pushf(); // preserve rbx, and flags @@ -890,19 +862,18 @@ void VM_Version::get_processor_features() { // xchg and xadd instructions _supports_atomic_getset4 = true; _supports_atomic_getadd4 = true; - LP64_ONLY(_supports_atomic_getset8 = true); - LP64_ONLY(_supports_atomic_getadd8 = true); + _supports_atomic_getset8 = true; + _supports_atomic_getadd8 = true; -#ifdef _LP64 // OS should support SSE for x64 and hardware should support at least SSE2. if (!VM_Version::supports_sse2()) { vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); } - // in 64 bit the use of SSE2 is the minimum - if (UseSSE < 2) UseSSE = 2; -#endif + // The use of SSE2 is the minimum + if (UseSSE < 2) { + UseSSE = 2; + } -#ifdef AMD64 // flush_icache_stub have to be generated first. // That is why Icache line size is hard coded in ICache class, // see icache_x86.hpp. It is also the reason why we can't use @@ -914,9 +885,7 @@ void VM_Version::get_processor_features() { guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); // clflush_size is size in quadwords (8 bytes). guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); -#endif -#ifdef _LP64 // assigning this field effectively enables Unsafe.writebackMemory() // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero // that is only implemented on x86_64 and only if the OS plays ball @@ -925,7 +894,6 @@ void VM_Version::get_processor_features() { // let if default to zero thereby disabling writeback _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; } -#endif // Check if processor has Intel Ecore if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && @@ -944,12 +912,6 @@ void VM_Version::get_processor_features() { _features &= ~CPU_SSE4A; } - if (UseSSE < 2) - _features &= ~CPU_SSE2; - - if (UseSSE < 1) - _features &= ~CPU_SSE; - //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { UseAVX = 0; @@ -959,18 +921,13 @@ void VM_Version::get_processor_features() { // the command line requires. I.e., you cannot set UseSSE to 2 on // older Pentiums which do not support it. int use_sse_limit = 0; - if (UseSSE > 0) { - if (UseSSE > 3 && supports_sse4_1()) { - use_sse_limit = 4; - } else if (UseSSE > 2 && supports_sse3()) { - use_sse_limit = 3; - } else if (UseSSE > 1 && supports_sse2()) { - use_sse_limit = 2; - } else if (UseSSE > 0 && supports_sse()) { - use_sse_limit = 1; - } else { - use_sse_limit = 0; - } + if (UseSSE > 3 && supports_sse4_1()) { + use_sse_limit = 4; + } else if (UseSSE > 2 && supports_sse3()) { + use_sse_limit = 3; + } else { + assert(supports_sse2(), "Checked before"); + use_sse_limit = 2; } if (FLAG_IS_DEFAULT(UseSSE)) { FLAG_SET_DEFAULT(UseSSE, use_sse_limit); @@ -1180,7 +1137,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); } -#ifdef _LP64 if (supports_avx2()) { if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { UseAdler32Intrinsics = true; @@ -1191,12 +1147,6 @@ void VM_Version::get_processor_features() { } FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); } -#else - if (UseAdler32Intrinsics) { - warning("Adler32Intrinsics not available on this CPU."); - FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); - } -#endif if (supports_sse4_2() && supports_clmul()) { if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { @@ -1220,7 +1170,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } -#ifdef _LP64 // ChaCha20 Intrinsics // As long as the system supports AVX as a baseline we can do a // SIMD-enabled block function. StubGenerator makes the determination @@ -1236,13 +1185,6 @@ void VM_Version::get_processor_features() { } FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); } -#else - // No support currently for ChaCha20 intrinsics on 32-bit platforms - if (UseChaCha20Intrinsics) { - warning("ChaCha20 intrinsics are not available on this CPU."); - FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); - } -#endif // _LP64 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) if (UseAVX >= 2) { @@ -1255,7 +1197,7 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); } - if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions + if (supports_fma()) { if (FLAG_IS_DEFAULT(UseFMA)) { UseFMA = true; } @@ -1268,7 +1210,7 @@ void VM_Version::get_processor_features() { UseMD5Intrinsics = true; } - if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { + if (supports_sha() || (supports_avx2() && supports_bmi2())) { if (FLAG_IS_DEFAULT(UseSHA)) { UseSHA = true; } @@ -1295,27 +1237,21 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } -#ifdef _LP64 // These are only supported on 64-bit if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) { if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); } - } else -#endif - if (UseSHA512Intrinsics) { + } else if (UseSHA512Intrinsics) { warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } -#ifdef _LP64 if (supports_evex() && supports_avx512bw()) { if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { UseSHA3Intrinsics = true; } - } else -#endif - if (UseSHA3Intrinsics) { + } else if (UseSHA3Intrinsics) { warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); } @@ -1324,23 +1260,11 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA, false); } -#ifdef COMPILER2 - if (UseFPUForSpilling) { - if (UseSSE < 2) { - // Only supported with SSE2+ - FLAG_SET_DEFAULT(UseFPUForSpilling, false); - } - } -#endif - #if COMPILER2_OR_JVMCI int max_vector_size = 0; - if (UseSSE < 2) { - // Vectors (in XMM) are only supported with SSE2+ - // SSE is always 2 on x64. - max_vector_size = 0; - } else if (UseAVX == 0 || !os_supports_avx_vectors()) { - // 16 byte vectors (in XMM) are supported with SSE2+ + if (UseAVX == 0 || !os_supports_avx_vectors()) { + // 16 byte vectors (in XMM) are supported with SSE2+. + // SSE2 is the minimum for x86_64. max_vector_size = 16; } else if (UseAVX == 1 || UseAVX == 2) { // 32 bytes vectors (in YMM) are only supported with AVX+ @@ -1350,11 +1274,7 @@ void VM_Version::get_processor_features() { max_vector_size = 64; } -#ifdef _LP64 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit -#else - int min_vector_size = 0; -#endif if (!FLAG_IS_DEFAULT(MaxVectorSize)) { if (MaxVectorSize < min_vector_size) { @@ -1378,7 +1298,7 @@ void VM_Version::get_processor_features() { if (MaxVectorSize > 0) { if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { tty->print_cr("State of YMM registers after signal handle:"); - int nreg = 2 LP64_ONLY(+2); + int nreg = 4; const char* ymm_name[4] = {"0", "7", "8", "15"}; for (int i = 0; i < nreg; i++) { tty->print("YMM%s:", ymm_name[i]); @@ -1391,31 +1311,24 @@ void VM_Version::get_processor_features() { } #endif // COMPILER2 && ASSERT -#ifdef _LP64 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) { if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); } - } else -#endif - if (UsePoly1305Intrinsics) { + } else if (UsePoly1305Intrinsics) { warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); } -#ifdef _LP64 if (supports_avx512ifma() && supports_avx512vlbw()) { if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) { FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true); } - } else -#endif - if (UseIntPolyIntrinsics) { + } else if (UseIntPolyIntrinsics) { warning("Intrinsics for Polynomial crypto functions not available on this CPU."); FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false); } -#ifdef _LP64 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { UseMultiplyToLenIntrinsic = true; } @@ -1431,38 +1344,6 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { UseMontgomerySquareIntrinsic = true; } -#else - if (UseMultiplyToLenIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { - warning("multiplyToLen intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); - } - if (UseMontgomeryMultiplyIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { - warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); - } - if (UseMontgomerySquareIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { - warning("montgomerySquare intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); - } - if (UseSquareToLenIntrinsic) { - if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { - warning("squareToLen intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); - } - if (UseMulAddIntrinsic) { - if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { - warning("mulAdd intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); - } -#endif // _LP64 #endif // COMPILER2_OR_JVMCI // On new cpus instructions which update whole XMM register should be used @@ -1739,7 +1620,6 @@ void VM_Version::get_processor_features() { } #endif -#ifdef _LP64 if (UseSSE42Intrinsics) { if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { UseVectorizedMismatchIntrinsic = true; @@ -1756,20 +1636,6 @@ void VM_Version::get_processor_features() { warning("vectorizedHashCode intrinsics are not available on this CPU"); FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); } -#else - if (UseVectorizedMismatchIntrinsic) { - if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { - warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); - } - if (UseVectorizedHashCodeIntrinsic) { - if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { - warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); - } - FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); - } -#endif // _LP64 // Use count leading zeros count instruction if available. if (supports_lzcnt()) { @@ -1854,7 +1720,7 @@ void VM_Version::get_processor_features() { #endif // Use XMM/YMM MOVDQU instruction for Object Initialization - if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { + if (!UseFastStosb && UseUnalignedLoadStores) { if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { UseXMMForObjInit = true; } @@ -1918,7 +1784,6 @@ void VM_Version::get_processor_features() { #endif } -#ifdef _LP64 // Prefetch settings // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from @@ -1937,7 +1802,6 @@ void VM_Version::get_processor_features() { if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); } -#endif if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) @@ -1969,22 +1833,18 @@ void VM_Version::get_processor_features() { #endif log->cr(); log->print("Allocation"); - if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { + if (AllocatePrefetchStyle <= 0) { log->print_cr(": no prefetching"); } else { log->print(" prefetching: "); - if (UseSSE == 0 && supports_3dnow_prefetch()) { + if (AllocatePrefetchInstr == 0) { + log->print("PREFETCHNTA"); + } else if (AllocatePrefetchInstr == 1) { + log->print("PREFETCHT0"); + } else if (AllocatePrefetchInstr == 2) { + log->print("PREFETCHT2"); + } else if (AllocatePrefetchInstr == 3) { log->print("PREFETCHW"); - } else if (UseSSE >= 1) { - if (AllocatePrefetchInstr == 0) { - log->print("PREFETCHNTA"); - } else if (AllocatePrefetchInstr == 1) { - log->print("PREFETCHT0"); - } else if (AllocatePrefetchInstr == 2) { - log->print("PREFETCHT2"); - } else if (AllocatePrefetchInstr == 3) { - log->print("PREFETCHW"); - } } if (AllocatePrefetchLines > 1) { log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); @@ -2172,11 +2032,9 @@ int VM_Version::avx3_threshold() { FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; } -#if defined(_LP64) void VM_Version::clear_apx_test_state() { clear_apx_test_state_stub(); } -#endif static bool _vm_version_initialized = false; @@ -2194,14 +2052,11 @@ void VM_Version::initialize() { g.generate_get_cpu_info()); detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, g.generate_detect_virt()); - -#if defined(_LP64) clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t, g.clear_apx_test_state()); -#endif get_processor_features(); - LP64_ONLY(Assembler::precompute_instructions();) + Assembler::precompute_instructions(); if (VM_Version::supports_hv()) { // Supports hypervisor check_virtualizations(); @@ -2968,12 +2823,10 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const { result |= CPU_CMOV; if (std_cpuid1_edx.bits.clflush != 0) result |= CPU_FLUSH; -#ifdef _LP64 // clflush should always be available on x86_64 // if not we are in real trouble because we rely on it // to flush the code cache. assert ((result & CPU_FLUSH) != 0, "clflush should be available"); -#endif if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && ext_cpuid1_edx.bits.fxsr != 0)) result |= CPU_FXSR; @@ -3150,7 +3003,7 @@ uint64_t VM_Version::CpuidInfo::feature_flags() const { bool VM_Version::os_supports_avx_vectors() { bool retVal = false; - int nreg = 2 LP64_ONLY(+2); + int nreg = 4; if (supports_evex()) { // Verify that OS save/restore all bits of EVEX registers // during signal processing. @@ -3306,11 +3159,7 @@ int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus return 192; } else if (use_watermark_prefetch) { // watermark prefetching on Core -#ifdef _LP64 return 384; -#else - return 320; -#endif } } if (supports_sse2()) { diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp index 004b64ebe6eb1..cf13bf0a2a589 100644 --- a/src/hotspot/cpu/x86/vm_version_x86.hpp +++ b/src/hotspot/cpu/x86/vm_version_x86.hpp @@ -637,7 +637,7 @@ class VM_Version : public Abstract_VM_Version { static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; } static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; } - LP64_ONLY(static void clear_apx_test_state()); + static void clear_apx_test_state(); static void clean_cpuFeatures() { _features = 0; } static void set_avx_cpuFeatures() { _features |= (CPU_SSE | CPU_SSE2 | CPU_AVX | CPU_VZEROUPPER ); } @@ -822,12 +822,12 @@ class VM_Version : public Abstract_VM_Version { // x86_64 supports fast class initialization checks static bool supports_fast_class_init_checks() { - return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + return true; } // x86_64 supports secondary supers table constexpr static bool supports_secondary_supers_table() { - return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32 + return true; } constexpr static bool supports_stack_watermark_barrier() { @@ -862,11 +862,7 @@ class VM_Version : public Abstract_VM_Version { // synchronize with other memory ops. so, it needs preceding // and trailing StoreStore fences. -#ifdef _LP64 static bool supports_clflush(); // Can't inline due to header file conflict -#else - static bool supports_clflush() { return ((_features & CPU_FLUSH) != 0); } -#endif // _LP64 // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit static bool supports_clflushopt() { return ((_features & CPU_FLUSHOPT) != 0); } diff --git a/src/hotspot/cpu/x86/vmreg_x86.cpp b/src/hotspot/cpu/x86/vmreg_x86.cpp index d40a6eaa4b2ac..143c804d3bc78 100644 --- a/src/hotspot/cpu/x86/vmreg_x86.cpp +++ b/src/hotspot/cpu/x86/vmreg_x86.cpp @@ -33,9 +33,7 @@ void VMRegImpl::set_regName() { int i; for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { regName[i++] = reg->name(); -#ifdef AMD64 regName[i++] = reg->name(); -#endif // AMD64 reg = reg->successor(); } diff --git a/src/hotspot/cpu/x86/vmreg_x86.hpp b/src/hotspot/cpu/x86/vmreg_x86.hpp index 6f7c7fafb3280..c0a92c1835aa9 100644 --- a/src/hotspot/cpu/x86/vmreg_x86.hpp +++ b/src/hotspot/cpu/x86/vmreg_x86.hpp @@ -55,11 +55,7 @@ inline Register as_Register() { assert( is_Register(), "must be"); // Yuk -#ifdef AMD64 return ::as_Register(value() >> 1); -#else - return ::as_Register(value()); -#endif // AMD64 } inline FloatRegister as_FloatRegister() { @@ -82,9 +78,6 @@ inline KRegister as_KRegister() { inline bool is_concrete() { assert(is_reg(), "must be"); -#ifndef AMD64 - if (is_Register()) return true; -#endif // AMD64 // Do not use is_XMMRegister() here as it depends on the UseAVX setting. if (value() >= ConcreteRegisterImpl::max_fpr && value() < ConcreteRegisterImpl::max_xmm) { int base = value() - ConcreteRegisterImpl::max_fpr; diff --git a/src/hotspot/cpu/x86/vmreg_x86.inline.hpp b/src/hotspot/cpu/x86/vmreg_x86.inline.hpp index 1aeedc094fd55..b7103a924ed9c 100644 --- a/src/hotspot/cpu/x86/vmreg_x86.inline.hpp +++ b/src/hotspot/cpu/x86/vmreg_x86.inline.hpp @@ -26,7 +26,7 @@ #define CPU_X86_VMREG_X86_INLINE_HPP inline VMReg Register::RegisterImpl::as_VMReg() const { - return VMRegImpl::as_VMReg(encoding() LP64_ONLY( << 1 )); + return VMRegImpl::as_VMReg(encoding() << 1); } inline VMReg FloatRegister::FloatRegisterImpl::as_VMReg() const { diff --git a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp b/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp deleted file mode 100644 index 8d3ceca7b4ab1..0000000000000 --- a/src/hotspot/cpu/x86/vtableStubs_x86_32.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#include "precompiled.hpp" -#include "asm/macroAssembler.hpp" -#include "code/compiledIC.hpp" -#include "code/vtableStubs.hpp" -#include "interp_masm_x86.hpp" -#include "memory/resourceArea.hpp" -#include "oops/instanceKlass.hpp" -#include "oops/klassVtable.hpp" -#include "runtime/sharedRuntime.hpp" -#include "vmreg_x86.inline.hpp" -#ifdef COMPILER2 -#include "opto/runtime.hpp" -#endif - -// machine-dependent part of VtableStubs: create VtableStub of correct size and -// initialize its code - -#define __ masm-> - -#ifndef PRODUCT -extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); -#endif - -// These stubs are used by the compiler only. -// Argument registers, which must be preserved: -// rcx - receiver (always first argument) -// rdx - second argument (if any) -// Other registers that might be usable: -// rax - inline cache register (is interface for itable stub) -// rbx - method (used when calling out to interpreter) -// Available now, but may become callee-save at some point: -// rsi, rdi -// Note that rax and rdx are also used for return values. - -VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { - // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. - const int stub_code_length = code_size_limit(true); - VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); - // Can be null if there is no free space in the code cache. - if (s == nullptr) { - return nullptr; - } - - // Count unused bytes in instruction sequences of variable size. - // We add them to the computed buffer size in order to avoid - // overflow in subsequently generated stubs. - address start_pc; - int slop_bytes = 0; - int slop_delta = 0; - // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. - const int index_dependent_slop = 0; - - ResourceMark rm; - CodeBuffer cb(s->entry_point(), stub_code_length); - MacroAssembler* masm = new MacroAssembler(&cb); - -#if (!defined(PRODUCT) && defined(COMPILER2)) - if (CountCompiledCalls) { - __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); - } -#endif - - // get receiver (need to skip return address on top of stack) - assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); - - // get receiver klass - address npe_addr = __ pc(); - __ movptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes())); - -#ifndef PRODUCT - if (DebugVtables) { - Label L; - start_pc = __ pc(); - // check offset vs vtable length - __ cmpl(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size()); - slop_delta = 10 - (__ pc() - start_pc); // cmpl varies in length, depending on data - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - - __ jcc(Assembler::greater, L); - __ movl(rbx, vtable_index); - // VTABLE TODO: find upper bound for call_VM length. - start_pc = __ pc(); - __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), rcx, rbx); - slop_delta = 500 - (__ pc() - start_pc); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - __ bind(L); - } -#endif // PRODUCT - - const Register method = rbx; - - // load Method* and target address - start_pc = __ pc(); - __ lookup_virtual_method(rax, vtable_index, method); - slop_delta = 6 - (int)(__ pc() - start_pc); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); - -#ifndef PRODUCT - if (DebugVtables) { - Label L; - __ cmpptr(method, NULL_WORD); - __ jcc(Assembler::equal, L); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); - __ jcc(Assembler::notZero, L); - __ stop("Vtable entry is null"); - __ bind(L); - } -#endif // PRODUCT - - // rax: receiver klass - // method (rbx): Method* - // rcx: receiver - address ame_addr = __ pc(); - __ jmp( Address(method, Method::from_compiled_offset())); - - masm->flush(); - slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets - bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); - - return s; -} - - -VtableStub* VtableStubs::create_itable_stub(int itable_index) { - // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. - const int stub_code_length = code_size_limit(false); - VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); - // Can be null if there is no free space in the code cache. - if (s == nullptr) { - return nullptr; - } - // Count unused bytes in instruction sequences of variable size. - // We add them to the computed buffer size in order to avoid - // overflow in subsequently generated stubs. - address start_pc; - int slop_bytes = 0; - int slop_delta = 0; - const int index_dependent_slop = (itable_index == 0) ? 4 : // code size change with transition from 8-bit to 32-bit constant (@index == 32). - (itable_index < 32) ? 3 : 0; // index == 0 generates even shorter code. - - ResourceMark rm; - CodeBuffer cb(s->entry_point(), stub_code_length); - MacroAssembler* masm = new MacroAssembler(&cb); - -#if (!defined(PRODUCT) && defined(COMPILER2)) - if (CountCompiledCalls) { - __ incrementl(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr())); - } -#endif /* PRODUCT */ - - // Entry arguments: - // rax: CompiledICData - // rcx: Receiver - - // Most registers are in use; we'll use rax, rbx, rcx, rdx, rsi, rdi - // (If we need to make rsi, rdi callee-save, do a push/pop here.) - const Register recv_klass_reg = rsi; - const Register holder_klass_reg = rax; // declaring interface klass (DEFC) - const Register resolved_klass_reg = rdi; // resolved interface klass (REFC) - const Register temp_reg = rdx; - const Register method = rbx; - const Register icdata_reg = rax; - const Register receiver = rcx; - - __ movptr(resolved_klass_reg, Address(icdata_reg, CompiledICData::itable_refc_klass_offset())); - __ movptr(holder_klass_reg, Address(icdata_reg, CompiledICData::itable_defc_klass_offset())); - - Label L_no_such_interface; - - // get receiver klass (also an implicit null-check) - assert(VtableStub::receiver_location() == rcx->as_VMReg(), "receiver expected in rcx"); - address npe_addr = __ pc(); - __ load_klass(recv_klass_reg, rcx, noreg); - - start_pc = __ pc(); - __ push(rdx); // temp_reg - - // Receiver subtype check against REFC. - // Get selected method from declaring class and itable index - __ lookup_interface_method_stub(recv_klass_reg, // input - holder_klass_reg, // input - resolved_klass_reg, // input - method, // output - temp_reg, - noreg, - receiver, // input (x86_32 only: to restore recv_klass value) - itable_index, - L_no_such_interface); - const ptrdiff_t lookupSize = __ pc() - start_pc; - - // We expect we need index_dependent_slop extra bytes. Reason: - // The emitted code in lookup_interface_method changes when itable_index exceeds 31. - // For windows, a narrow estimate was found to be 104. Other OSes not tested. - const ptrdiff_t estimate = 104; - const ptrdiff_t codesize = lookupSize + index_dependent_slop; - slop_delta = (int)(estimate - codesize); - slop_bytes += slop_delta; - assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize); - - // method (rbx): Method* - // rcx: receiver - -#ifdef ASSERT - if (DebugVtables) { - Label L1; - __ cmpptr(method, NULL_WORD); - __ jcc(Assembler::equal, L1); - __ cmpptr(Address(method, Method::from_compiled_offset()), NULL_WORD); - __ jcc(Assembler::notZero, L1); - __ stop("Method* is null"); - __ bind(L1); - } -#endif // ASSERT - - __ pop(rdx); - address ame_addr = __ pc(); - __ jmp(Address(method, Method::from_compiled_offset())); - - __ bind(L_no_such_interface); - // Handle IncompatibleClassChangeError in itable stubs. - // More detailed error message. - // We force resolving of the call site by jumping to the "handle - // wrong method" stub, and so let the interpreter runtime do all the - // dirty work. - __ pop(rdx); - __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub())); - - masm->flush(); - slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets - bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop); - - return s; -} - -int VtableStub::pd_code_alignment() { - // x86 cache line size is 64 bytes, but we want to limit alignment loss. - const unsigned int icache_line_size = wordSize; - return icache_line_size; -} diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 95b761ad44ead..3c30a18544b42 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -210,8 +210,6 @@ reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); -#ifdef _LP64 - reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); @@ -620,13 +618,7 @@ reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); -#endif // _LP64 - -#ifdef _LP64 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); -#else -reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); -#endif // _LP64 // AVX3 Mask Registers. reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); @@ -658,17 +650,16 @@ alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, - XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, + XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, - XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p - ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, + XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, @@ -684,7 +675,6 @@ alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p -#endif ); alloc_class chunk2(K7, K7_H, @@ -726,9 +716,8 @@ reg_class float_reg_legacy(XMM0, XMM4, XMM5, XMM6, - XMM7 -#ifdef _LP64 - ,XMM8, + XMM7, + XMM8, XMM9, XMM10, XMM11, @@ -736,7 +725,6 @@ reg_class float_reg_legacy(XMM0, XMM13, XMM14, XMM15 -#endif ); // Class for evex float registers @@ -747,9 +735,8 @@ reg_class float_reg_evex(XMM0, XMM4, XMM5, XMM6, - XMM7 -#ifdef _LP64 - ,XMM8, + XMM7, + XMM8, XMM9, XMM10, XMM11, @@ -773,7 +760,6 @@ reg_class float_reg_evex(XMM0, XMM29, XMM30, XMM31 -#endif ); reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -787,9 +773,8 @@ reg_class double_reg_legacy(XMM0, XMM0b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, - XMM7, XMM7b -#ifdef _LP64 - ,XMM8, XMM8b, + XMM7, XMM7b, + XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, @@ -797,7 +782,6 @@ reg_class double_reg_legacy(XMM0, XMM0b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b -#endif ); // Class for evex double registers @@ -808,9 +792,8 @@ reg_class double_reg_evex(XMM0, XMM0b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, - XMM7, XMM7b -#ifdef _LP64 - ,XMM8, XMM8b, + XMM7, XMM7b, + XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, @@ -834,7 +817,6 @@ reg_class double_reg_evex(XMM0, XMM0b, XMM29, XMM29b, XMM30, XMM30b, XMM31, XMM31b -#endif ); reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -848,9 +830,8 @@ reg_class vectors_reg_legacy(XMM0, XMM4, XMM5, XMM6, - XMM7 -#ifdef _LP64 - ,XMM8, + XMM7, + XMM8, XMM9, XMM10, XMM11, @@ -858,7 +839,6 @@ reg_class vectors_reg_legacy(XMM0, XMM13, XMM14, XMM15 -#endif ); // Class for evex 32bit vector registers @@ -869,9 +849,8 @@ reg_class vectors_reg_evex(XMM0, XMM4, XMM5, XMM6, - XMM7 -#ifdef _LP64 - ,XMM8, + XMM7, + XMM8, XMM9, XMM10, XMM11, @@ -895,7 +874,6 @@ reg_class vectors_reg_evex(XMM0, XMM29, XMM30, XMM31 -#endif ); reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -909,9 +887,8 @@ reg_class vectord_reg_legacy(XMM0, XMM0b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, - XMM7, XMM7b -#ifdef _LP64 - ,XMM8, XMM8b, + XMM7, XMM7b, + XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, @@ -919,7 +896,6 @@ reg_class vectord_reg_legacy(XMM0, XMM0b, XMM13, XMM13b, XMM14, XMM14b, XMM15, XMM15b -#endif ); // Class for all 64bit vector registers @@ -930,9 +906,8 @@ reg_class vectord_reg_evex(XMM0, XMM0b, XMM4, XMM4b, XMM5, XMM5b, XMM6, XMM6b, - XMM7, XMM7b -#ifdef _LP64 - ,XMM8, XMM8b, + XMM7, XMM7b, + XMM8, XMM8b, XMM9, XMM9b, XMM10, XMM10b, XMM11, XMM11b, @@ -956,7 +931,6 @@ reg_class vectord_reg_evex(XMM0, XMM0b, XMM29, XMM29b, XMM30, XMM30b, XMM31, XMM31b -#endif ); reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -970,9 +944,8 @@ reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d, XMM6, XMM6b, XMM6c, XMM6d, - XMM7, XMM7b, XMM7c, XMM7d -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, + XMM7, XMM7b, XMM7c, XMM7d, + XMM8, XMM8b, XMM8c, XMM8d, XMM9, XMM9b, XMM9c, XMM9d, XMM10, XMM10b, XMM10c, XMM10d, XMM11, XMM11b, XMM11c, XMM11d, @@ -980,7 +953,6 @@ reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM13, XMM13b, XMM13c, XMM13d, XMM14, XMM14b, XMM14c, XMM14d, XMM15, XMM15b, XMM15c, XMM15d -#endif ); // Class for all 128bit vector registers @@ -991,9 +963,8 @@ reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM4, XMM4b, XMM4c, XMM4d, XMM5, XMM5b, XMM5c, XMM5d, XMM6, XMM6b, XMM6c, XMM6d, - XMM7, XMM7b, XMM7c, XMM7d -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, + XMM7, XMM7b, XMM7c, XMM7d, + XMM8, XMM8b, XMM8c, XMM8d, XMM9, XMM9b, XMM9c, XMM9d, XMM10, XMM10b, XMM10c, XMM10d, XMM11, XMM11b, XMM11c, XMM11d, @@ -1017,7 +988,6 @@ reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM29, XMM29b, XMM29c, XMM29d, XMM30, XMM30b, XMM30c, XMM30d, XMM31, XMM31b, XMM31c, XMM31d -#endif ); reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -1031,9 +1001,8 @@ reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, - XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, + XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, @@ -1041,7 +1010,6 @@ reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h -#endif ); // Class for all 256bit vector registers @@ -1052,9 +1020,8 @@ reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, - XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, + XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, @@ -1078,7 +1045,6 @@ reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h -#endif ); reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -1092,17 +1058,16 @@ reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, - XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, + XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, - XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p - ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, + XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, + XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, @@ -1118,7 +1083,6 @@ reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p -#endif ); // Class for restricted 512bit vector registers @@ -1129,9 +1093,8 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, - XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p -#ifdef _LP64 - ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, + XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, + XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, @@ -1139,7 +1102,6 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p -#endif ); reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); @@ -1199,21 +1161,10 @@ class HandlerImpl { return NativeJump::instruction_size; } -#ifdef _LP64 static uint size_deopt_handler() { // three 5 byte instructions plus one move for unreachable address. return 15+3; } -#else - static uint size_deopt_handler() { - // NativeCall instruction size is the same as NativeJump. - // exception handler starts out as jump and can be patched to - // a call be deoptimization. (4932387) - // Note that this value is also credited (in output.cpp) to - // the size of the code section. - return 5 + NativeJump::instruction_size; // pushl(); jmp; - } -#endif }; inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { @@ -1334,7 +1285,6 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { } int offset = __ offset(); -#ifdef _LP64 address the_pc = (address) __ pc(); Label next; // push a "the_pc" on the stack without destroying any registers @@ -1345,10 +1295,6 @@ int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { __ bind(next); // adjust it so it matches "the_pc" __ subptr(Address(rsp, 0), __ offset() - offset); -#else - InternalAddress here(__ pc()); - __ pushptr(here.addr(), noreg); -#endif __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); @@ -1372,17 +1318,10 @@ static Assembler::Width widthForType(BasicType bt) { //============================================================================= // Float masks come from different places depending on platform. -#ifdef _LP64 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } -#else - static address float_signmask() { return (address)float_signmask_pool; } - static address float_signflip() { return (address)float_signflip_pool; } - static address double_signmask() { return (address)double_signmask_pool; } - static address double_signflip() { return (address)double_signflip_pool; } -#endif static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } @@ -1404,7 +1343,6 @@ bool Matcher::match_rule_supported(int opcode) { if (!has_match_rule(opcode)) { return false; // no match rule present } - const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); switch (opcode) { case Op_AbsVL: case Op_StoreVectorScatter: @@ -1446,9 +1384,6 @@ bool Matcher::match_rule_supported(int opcode) { } break; case Op_AddReductionVL: - if (UseSSE < 2) { // requires at least SSE2 - return false; - } break; case Op_AbsVB: case Op_AbsVS: @@ -1492,7 +1427,7 @@ bool Matcher::match_rule_supported(int opcode) { } break; case Op_PopulateIndex: - if (!is_LP64 || (UseAVX < 2)) { + if (UseAVX < 2) { return false; } break; @@ -1507,9 +1442,7 @@ bool Matcher::match_rule_supported(int opcode) { } break; case Op_CompareAndSwapL: -#ifdef _LP64 case Op_CompareAndSwapP: -#endif break; case Op_StrIndexOf: if (!UseSSE42Intrinsics) { @@ -1538,7 +1471,6 @@ bool Matcher::match_rule_supported(int opcode) { return false; } break; -#ifdef _LP64 case Op_MaxD: case Op_MaxF: case Op_MinD: @@ -1547,7 +1479,6 @@ bool Matcher::match_rule_supported(int opcode) { return false; } break; -#endif case Op_CacheWB: case Op_CacheWBPreSync: case Op_CacheWBPostSync: @@ -1590,7 +1521,7 @@ bool Matcher::match_rule_supported(int opcode) { case Op_VectorCmpMasked: case Op_VectorMaskGen: - if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { + if (UseAVX < 3 || !VM_Version::supports_bmi2()) { return false; } break; @@ -1598,62 +1529,34 @@ bool Matcher::match_rule_supported(int opcode) { case Op_VectorMaskLastTrue: case Op_VectorMaskTrueCount: case Op_VectorMaskToLong: - if (!is_LP64 || UseAVX < 1) { + if (UseAVX < 1) { return false; } break; case Op_RoundF: case Op_RoundD: - if (!is_LP64) { - return false; - } break; case Op_CopySignD: case Op_CopySignF: - if (UseAVX < 3 || !is_LP64) { + if (UseAVX < 3) { return false; } if (!VM_Version::supports_avx512vl()) { return false; } break; -#ifndef _LP64 - case Op_AddReductionVF: - case Op_AddReductionVD: - case Op_MulReductionVF: - case Op_MulReductionVD: - if (UseSSE < 1) { // requires at least SSE - return false; - } - break; - case Op_MulAddVS2VI: - case Op_RShiftVL: - case Op_AbsVD: - case Op_NegVD: - if (UseSSE < 2) { - return false; - } - break; -#endif // !LP64 case Op_CompressBits: - if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { + if (!VM_Version::supports_bmi2()) { return false; } break; case Op_ExpandBits: - if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { + if (!VM_Version::supports_bmi2()) { return false; } break; case Op_SignumF: - if (UseSSE < 1) { - return false; - } - break; case Op_SignumD: - if (UseSSE < 2) { - return false; - } break; case Op_CompressM: if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { @@ -1661,19 +1564,7 @@ bool Matcher::match_rule_supported(int opcode) { } break; case Op_SqrtF: - if (UseSSE < 1) { - return false; - } - break; case Op_SqrtD: -#ifdef _LP64 - if (UseSSE < 2) { - return false; - } -#else - // x86_32.ad has a special match rule for SqrtD. - // Together with common x86 rules, this handles all UseSSE cases. -#endif break; case Op_ConvF2HF: case Op_ConvHF2F: @@ -1705,7 +1596,6 @@ bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, Basi // Identify extra cases that we might want to provide match rules for vector nodes and // other intrinsics guarded with vector length (vlen) and element type (bt). bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); if (!match_rule_supported(opcode)) { return false; } @@ -1752,7 +1642,7 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { case Op_ClearArray: case Op_VectorMaskGen: case Op_VectorCmpMasked: - if (!is_LP64 || !VM_Version::supports_avx512bw()) { + if (!VM_Version::supports_avx512bw()) { return false; } if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { @@ -1802,19 +1692,7 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { if (is_subword_type(bt) && (UseSSE < 4)) { return false; } -#ifndef _LP64 - if (bt == T_BYTE || bt == T_LONG) { - return false; - } -#endif - break; -#ifndef _LP64 - case Op_VectorInsert: - if (bt == T_LONG || bt == T_DOUBLE) { - return false; - } break; -#endif case Op_MinReductionV: case Op_MaxReductionV: if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { @@ -1829,11 +1707,6 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { return false; } -#ifndef _LP64 - if (bt == T_BYTE || bt == T_LONG) { - return false; - } -#endif break; case Op_VectorTest: if (UseSSE < 4) { @@ -1918,9 +1791,8 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { return false; } if (is_subword_type(bt) && - (!is_LP64 || - (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || - (size_in_bits < 64) || + ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || + (size_in_bits < 64) || (bt == T_SHORT && !VM_Version::supports_bmi2()))) { return false; } @@ -1990,14 +1862,11 @@ bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { return false; } - if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { - return false; - } if (size_in_bits < 128 ) { return false; } case Op_VectorLongToMask: - if (UseAVX < 1 || !is_LP64) { + if (UseAVX < 1) { return false; } if (UseAVX < 3 && !VM_Version::supports_bmi2()) { @@ -2045,7 +1914,6 @@ bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType return false; } - const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { return false; @@ -2381,7 +2249,6 @@ static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, address_visited.set(shift->_idx); // Flag as address_visited mstack.push(shift->in(2), Matcher::Visit); Node *conv = shift->in(1); -#ifdef _LP64 // Allow Matcher to match the rule which bypass // ConvI2L operation for an array index on LP64 // if the index value is positive. @@ -2391,9 +2258,9 @@ static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, !matcher->is_visited(conv)) { address_visited.set(conv->_idx); // Flag as address_visited mstack.push(conv->in(1), Matcher::Pre_Visit); - } else -#endif + } else { mstack.push(conv, Matcher::Pre_Visit); + } return true; } return false; @@ -2531,7 +2398,7 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, if (adr->is_AddP() && !adr->in(AddPNode::Base)->is_top() && !adr->in(AddPNode::Offset)->is_Con() && - LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 + off->get_long() == (int) (off->get_long()) && // immL32 // Are there other uses besides address expressions? !is_visited(adr)) { address_visited.set(adr->_idx); // Flag as address_visited @@ -2605,26 +2472,18 @@ static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, case Op_VecS: // copy whole register case Op_VecD: case Op_VecX: -#ifndef _LP64 - __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); } -#endif break; case Op_VecY: -#ifndef _LP64 - __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); } else { __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); } -#endif break; case Op_VecZ: __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); @@ -2663,28 +2522,20 @@ void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); break; case Op_VecX: -#ifndef _LP64 - __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); } else { __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); } -#endif break; case Op_VecY: -#ifndef _LP64 - __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); } else { __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); } -#endif break; case Op_VecZ: __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); @@ -2701,28 +2552,20 @@ void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); break; case Op_VecX: -#ifndef _LP64 - __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); } else { __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); } -#endif break; case Op_VecY: -#ifndef _LP64 - __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); -#else if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); } else { __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); } -#endif break; case Op_VecZ: __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); @@ -3953,7 +3796,6 @@ instruct reinterpret_shrink(vec dst, legVec src) %{ // ---------------------------------------------------------------------------------------------------- -#ifdef _LP64 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ match(Set dst (RoundDoubleMode src rmode)); format %{ "roundsd $dst,$src" %} @@ -4024,7 +3866,6 @@ instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ %} ins_pipe( pipe_slow ); %} -#endif // _LP64 instruct onspinwait() %{ match(OnSpinWait); @@ -4242,7 +4083,6 @@ instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offs %} -#ifdef _LP64 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); @@ -4405,7 +4245,6 @@ instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base %} ins_pipe( pipe_slow ); %} -#endif // ====================Scatter======================================= @@ -4604,7 +4443,7 @@ instruct ReplI_zero(vec dst, immI_0 zero) %{ %} instruct ReplI_M1(vec dst, immI_M1 con) %{ - predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); + predicate(Matcher::is_non_long_integral_vector(n)); match(Set dst (Replicate con)); format %{ "vallones $dst" %} ins_encode %{ @@ -4616,7 +4455,6 @@ instruct ReplI_M1(vec dst, immI_M1 con) %{ // ====================ReplicateL======================================= -#ifdef _LP64 // Replicate long (8 byte) scalar to be vector instruct ReplL_reg(vec dst, rRegL src) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); @@ -4637,61 +4475,6 @@ instruct ReplL_reg(vec dst, rRegL src) %{ %} ins_pipe( pipe_slow ); %} -#else // _LP64 -// Replicate long (8 byte) scalar to be vector -instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ - predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst (Replicate src)); - effect(TEMP dst, USE src, TEMP tmp); - format %{ "replicateL $dst,$src" %} - ins_encode %{ - uint vlen = Matcher::vector_length(this); - if (vlen == 2) { - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands - int vlen_enc = Assembler::AVX_256bit; - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } else { - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); - } - %} - ins_pipe( pipe_slow ); -%} - -instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ - predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); - match(Set dst (Replicate src)); - effect(TEMP dst, USE src, TEMP tmp); - format %{ "replicateL $dst,$src" %} - ins_encode %{ - if (VM_Version::supports_avx512vl()) { - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); - __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); - } else { - int vlen_enc = Assembler::AVX_512bit; - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); - } - %} - ins_pipe( pipe_slow ); -%} -#endif // _LP64 instruct ReplL_mem(vec dst, memory mem) %{ predicate(Matcher::vector_element_basic_type(n) == T_LONG); @@ -4740,7 +4523,7 @@ instruct ReplL_zero(vec dst, immL0 zero) %{ %} instruct ReplL_M1(vec dst, immL_M1 con) %{ - predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); + predicate(Matcher::vector_element_basic_type(n) == T_LONG); match(Set dst (Replicate con)); format %{ "vallones $dst" %} ins_encode %{ @@ -4965,7 +4748,6 @@ instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ predicate(Matcher::vector_length(n) == 2); match(Set dst (VectorInsert (Binary dst val) idx)); @@ -5016,7 +4798,6 @@ instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ %} ins_pipe( pipe_slow ); %} -#endif instruct insertF(vec dst, regF val, immU8 idx) %{ predicate(Matcher::vector_length(n) < 8); @@ -5062,7 +4843,6 @@ instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ predicate(Matcher::vector_length(n) == 2); match(Set dst (VectorInsert (Binary dst val) idx)); @@ -5117,7 +4897,6 @@ instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %} ins_pipe( pipe_slow ); %} -#endif // ====================REDUCTION ARITHMETIC======================================= @@ -5144,7 +4923,6 @@ instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtm // =======================Long Reduction========================================== -#ifdef _LP64 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); match(Set dst (AddReductionVL src1 src2)); @@ -5182,7 +4960,6 @@ instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtm %} ins_pipe( pipe_slow ); %} -#endif // _LP64 // =======================Float Reduction========================================== @@ -5394,7 +5171,6 @@ instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, l // =======================Byte Reduction========================================== -#ifdef _LP64 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); match(Set dst (AddReductionVI src1 src2)); @@ -5430,7 +5206,6 @@ instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtm %} ins_pipe( pipe_slow ); %} -#endif // =======================Short Reduction========================================== @@ -6731,7 +6506,6 @@ instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ // Result going from high bit to low bit is 0x11100100 = 0xe4 // --------------------------------------- -#ifdef _LP64 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ match(Set dst (CopySignF dst src)); effect(TEMP tmp1, TEMP tmp2); @@ -6757,8 +6531,6 @@ instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ ins_pipe( pipe_slow ); %} -#endif // _LP64 - //----------------------------- CompressBits/ExpandBits ------------------------ instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ @@ -7123,7 +6895,6 @@ instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ ins_encode %{ uint vlen = Matcher::vector_length(this); if (vlen == 2) { - assert(UseSSE >= 2, "required"); __ movdqu($dst$$XMMRegister, $src$$XMMRegister); __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); @@ -7931,7 +7702,6 @@ instruct vucast(vec dst, vec src) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) < 64 && @@ -7981,8 +7751,6 @@ instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ins_pipe( pipe_slow ); %} -#endif // _LP64 - // --------------------------------- VectorMaskCmp -------------------------------------- instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ @@ -8192,9 +7960,7 @@ instruct extractI(rRegI dst, legVec src, immU8 idx) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src match(Set dst (ExtractI src idx)); match(Set dst (ExtractS src idx)); -#ifdef _LP64 match(Set dst (ExtractB src idx)); -#endif format %{ "extractI $dst,$src,$idx\t!" %} ins_encode %{ assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); @@ -8210,9 +7976,7 @@ instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ Matcher::vector_length_in_bytes(n->in(1)) == 64); // src match(Set dst (ExtractI src idx)); match(Set dst (ExtractS src idx)); -#ifdef _LP64 match(Set dst (ExtractB src idx)); -#endif effect(TEMP vtmp); format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} ins_encode %{ @@ -8225,7 +7989,6 @@ instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ predicate(Matcher::vector_length(n->in(1)) <= 2); // src match(Set dst (ExtractL src idx)); @@ -8253,7 +8016,6 @@ instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ %} ins_pipe( pipe_slow ); %} -#endif instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ predicate(Matcher::vector_length(n->in(1)) <= 4); @@ -8506,7 +8268,6 @@ instruct vabsnegD(vec dst, vec src) %{ int opcode = this->ideal_Opcode(); uint vlen = Matcher::vector_length(this); if (vlen == 2) { - assert(UseSSE >= 2, "required"); __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); } else { int vlen_enc = vector_length_encoding(this); @@ -8518,7 +8279,6 @@ instruct vabsnegD(vec dst, vec src) %{ //------------------------------------- VectorTest -------------------------------------------- -#ifdef _LP64 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); match(Set cr (VectorTest src1 src2)); @@ -8586,7 +8346,6 @@ instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ %} ins_pipe( pipe_slow ); %} -#endif //------------------------------------- LoadMask -------------------------------------------- @@ -8837,7 +8596,6 @@ instruct loadIotaIndices(vec dst, immI_0 src) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ match(Set dst (PopulateIndex src1 src2)); effect(TEMP dst, TEMP vtmp); @@ -8869,7 +8627,7 @@ instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ %} ins_pipe( pipe_slow ); %} -#endif + //-------------------------------- Rearrange ---------------------------------- // LoadShuffle/Rearrange for Byte @@ -9450,7 +9208,6 @@ instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ match(Set addr (VerifyVectorAlignment addr mask)); effect(KILL cr); @@ -9664,7 +9421,6 @@ instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, %} // --------------------------------- Compress/Expand Operations --------------------------- -#ifdef _LP64 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); match(Set dst (CompressV src mask)); @@ -9680,7 +9436,6 @@ instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratc %} ins_pipe( pipe_slow ); %} -#endif instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); @@ -9708,8 +9463,6 @@ instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, ins_pipe( pipe_slow ); %} -#endif // _LP64 - // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ @@ -10430,7 +10183,6 @@ instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ ins_pipe( pipe_slow ); %} -#ifdef _LP64 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); match(Set dst (XorVMask src (MaskAll cnt))); @@ -10495,7 +10247,6 @@ instruct long_to_mask_evex(kReg dst, rRegL src) %{ %} ins_pipe( pipe_slow ); %} -#endif instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ match(Set dst (AndVMask src1 src2)); diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad deleted file mode 100644 index 02c0f9362085e..0000000000000 --- a/src/hotspot/cpu/x86/x86_32.ad +++ /dev/null @@ -1,13846 +0,0 @@ -// -// Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. -// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -// -// This code is free software; you can redistribute it and/or modify it -// under the terms of the GNU General Public License version 2 only, as -// published by the Free Software Foundation. -// -// This code is distributed in the hope that it will be useful, but WITHOUT -// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// version 2 for more details (a copy is included in the LICENSE file that -// accompanied this code). -// -// You should have received a copy of the GNU General Public License version -// 2 along with this work; if not, write to the Free Software Foundation, -// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -// -// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -// or visit www.oracle.com if you need additional information or have any -// questions. -// -// - -// X86 Architecture Description File - -//----------REGISTER DEFINITION BLOCK------------------------------------------ -// This information is used by the matcher and the register allocator to -// describe individual registers and classes of registers within the target -// architecture. - -register %{ -//----------Architecture Description Register Definitions---------------------- -// General Registers -// "reg_def" name ( register save type, C convention save type, -// ideal register type, encoding ); -// Register Save Types: -// -// NS = No-Save: The register allocator assumes that these registers -// can be used without saving upon entry to the method, & -// that they do not need to be saved at call sites. -// -// SOC = Save-On-Call: The register allocator assumes that these registers -// can be used without saving upon entry to the method, -// but that they must be saved at call sites. -// -// SOE = Save-On-Entry: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, but they do not need to be saved at call -// sites. -// -// AS = Always-Save: The register allocator assumes that these registers -// must be saved before using them upon entry to the -// method, & that they must be saved at call sites. -// -// Ideal Register Type is used to determine how to save & restore a -// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get -// spilled with LoadP/StoreP. If the register supports both, use Op_RegI. -// -// The encoding number is the actual bit-pattern placed into the opcodes. - -// General Registers -// Previously set EBX, ESI, and EDI as save-on-entry for java code -// Turn off SOE in java-code due to frequent use of uncommon-traps. -// Now that allocator is better, turn on ESI and EDI as SOE registers. - -reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg()); -reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg()); -reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg()); -reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg()); -// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code -reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg()); -reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg()); -reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg()); -reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg()); - -// Float registers. We treat TOS/FPR0 special. It is invisible to the -// allocator, and only shows up in the encodings. -reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); -reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad()); -// Ok so here's the trick FPR1 is really st(0) except in the midst -// of emission of assembly for a machnode. During the emission the fpu stack -// is pushed making FPR1 == st(1) temporarily. However at any safepoint -// the stack will not have this element so FPR1 == st(0) from the -// oopMap viewpoint. This same weirdness with numbering causes -// instruction encoding to have to play games with the register -// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation -// where it does flt->flt moves to see an example -// -reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()); -reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next()); -reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()); -reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next()); -reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()); -reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next()); -reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()); -reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next()); -reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()); -reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next()); -reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()); -reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next()); -reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()); -reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next()); -// -// Empty fill registers, which are never used, but supply alignment to xmm regs -// -reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad()); -reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad()); -reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad()); -reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad()); -reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad()); -reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad()); -reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad()); -reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad()); - -// Specify priority of register selection within phases of register -// allocation. Highest priority is first. A useful heuristic is to -// give registers a low priority when they are required by machine -// instructions, like EAX and EDX. Registers which are used as -// pairs must fall on an even boundary (witness the FPR#L's in this list). -// For the Intel integer registers, the equivalent Long pairs are -// EDX:EAX, EBX:ECX, and EDI:EBP. -alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP, - FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H, - FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H, - FPR6L, FPR6H, FPR7L, FPR7H, - FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7); - - -//----------Architecture Description Register Classes-------------------------- -// Several register classes are automatically defined based upon information in -// this architecture description. -// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) -// -// Class for no registers (empty set). -reg_class no_reg(); - -// Class for all registers -reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP); -// Class for all registers (excluding EBP) -reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP); -// Dynamic register class that selects at runtime between register classes -// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). -// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg; -reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class for general registers -reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX); -// Class for general registers (excluding EBP). -// It is also safe for use by tailjumps (we don't want to allocate in ebp). -// Used also if the PreserveFramePointer flag is true. -reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX); -// Dynamic register class that selects between int_reg and int_reg_no_ebp. -reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of "X" registers -reg_class int_x_reg(EBX, ECX, EDX, EAX); - -// Class of registers that can appear in an address with no offset. -// EBP and ESP require an extra instruction byte for zero offset. -// Used in fast-unlock -reg_class p_reg(EDX, EDI, ESI, EBX); - -// Class for general registers excluding ECX -reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX); -// Class for general registers excluding ECX (and EBP) -reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX); -// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp. -reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class for general registers excluding EAX -reg_class nax_reg(EDX, EDI, ESI, ECX, EBX); - -// Class for general registers excluding EAX and EBX. -reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP); -// Class for general registers excluding EAX and EBX (and EBP) -reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX); -// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp. -reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of EAX (for multiply and divide operations) -reg_class eax_reg(EAX); - -// Class of EBX (for atomic add) -reg_class ebx_reg(EBX); - -// Class of ECX (for shift and JCXZ operations and cmpLTMask) -reg_class ecx_reg(ECX); - -// Class of EDX (for multiply and divide operations) -reg_class edx_reg(EDX); - -// Class of EDI (for synchronization) -reg_class edi_reg(EDI); - -// Class of ESI (for synchronization) -reg_class esi_reg(ESI); - -// Singleton class for stack pointer -reg_class sp_reg(ESP); - -// Singleton class for instruction pointer -// reg_class ip_reg(EIP); - -// Class of integer register pairs -reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI ); -// Class of integer register pairs (excluding EBP and EDI); -reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX ); -// Dynamic register class that selects between long_reg and long_reg_no_ebp. -reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %}); - -// Class of integer register pairs that aligns with calling convention -reg_class eadx_reg( EAX,EDX ); -reg_class ebcx_reg( ECX,EBX ); -reg_class ebpd_reg( EBP,EDI ); - -// Not AX or DX, used in divides -reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP); -// Not AX or DX (and neither EBP), used in divides -reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI); -// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp. -reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %}); - -// Floating point registers. Notice FPR0 is not a choice. -// FPR0 is not ever allocated; we use clever encodings to fake -// a 2-address instructions out of Intels FP stack. -reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L ); - -reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H, - FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H, - FPR7L,FPR7H ); - -reg_class fp_flt_reg0( FPR1L ); -reg_class fp_dbl_reg0( FPR1L,FPR1H ); -reg_class fp_dbl_reg1( FPR2L,FPR2H ); -reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H, - FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H ); - -%} - - -//----------SOURCE BLOCK------------------------------------------------------- -// This is a block of C++ code which provides values, functions, and -// definitions necessary in the rest of the architecture description -source_hpp %{ -// Must be visible to the DFA in dfa_x86_32.cpp -extern bool is_operand_hi32_zero(Node* n); -%} - -source %{ -#define RELOC_IMM32 Assembler::imm_operand -#define RELOC_DISP32 Assembler::disp32_operand - -#define __ masm-> - -// How to find the high register of a Long pair, given the low register -#define HIGH_FROM_LOW(x) (as_Register((x)->encoding()+2)) -#define HIGH_FROM_LOW_ENC(x) ((x)+2) - -// These masks are used to provide 128-bit aligned bitmasks to the XMM -// instructions, to allow sign-masking or sign-bit flipping. They allow -// fast versions of NegF/NegD and AbsF/AbsD. - -void reg_mask_init() {} - -// Note: 'double' and 'long long' have 32-bits alignment on x86. -static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) { - // Use the expression (adr)&(~0xF) to provide 128-bits aligned address - // of 128-bits operands for SSE instructions. - jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF))); - // Store the value to a 128-bits operand. - operand[0] = lo; - operand[1] = hi; - return operand; -} - -// Buffer for 128-bits masks used by SSE instructions. -static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment) - -// Static initialization during VM startup. -static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF)); -static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF)); -static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000)); -static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000)); - -// Offset hacking within calls. -static int pre_call_resets_size() { - int size = 0; - Compile* C = Compile::current(); - if (C->in_24_bit_fp_mode()) { - size += 6; // fldcw - } - if (VM_Version::supports_vzeroupper()) { - size += 3; // vzeroupper - } - return size; -} - -// !!!!! Special hack to get all type of calls to specify the byte offset -// from the start of the call to the point where the return address -// will point. -int MachCallStaticJavaNode::ret_addr_offset() { - return 5 + pre_call_resets_size(); // 5 bytes from start of call to where return address points -} - -int MachCallDynamicJavaNode::ret_addr_offset() { - return 10 + pre_call_resets_size(); // 10 bytes from start of call to where return address points -} - -static int sizeof_FFree_Float_Stack_All = -1; - -int MachCallRuntimeNode::ret_addr_offset() { - assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already"); - return 5 + pre_call_resets_size() + (_leaf_no_fp ? 0 : sizeof_FFree_Float_Stack_All); -} - -// -// Compute padding required for nodes which need alignment -// - -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallStaticJavaDirectNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += 1; // skip call opcode byte - return align_up(current_offset, alignment_required()) - current_offset; -} - -// The address of the call instruction needs to be 4-byte aligned to -// ensure that it does not span a cache line so that it can be patched. -int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { - current_offset += pre_call_resets_size(); // skip fldcw, if any - current_offset += 5; // skip MOV instruction - current_offset += 1; // skip call opcode byte - return align_up(current_offset, alignment_required()) - current_offset; -} - -// EMIT_RM() -void emit_rm(C2_MacroAssembler *masm, int f1, int f2, int f3) { - unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3); - __ emit_int8(c); -} - -// EMIT_CC() -void emit_cc(C2_MacroAssembler *masm, int f1, int f2) { - unsigned char c = (unsigned char)( f1 | f2 ); - __ emit_int8(c); -} - -// EMIT_OPCODE() -void emit_opcode(C2_MacroAssembler *masm, int code) { - __ emit_int8((unsigned char) code); -} - -// EMIT_OPCODE() w/ relocation information -void emit_opcode(C2_MacroAssembler *masm, int code, relocInfo::relocType reloc, int offset = 0) { - __ relocate(__ inst_mark() + offset, reloc); - emit_opcode(masm, code); -} - -// EMIT_D8() -void emit_d8(C2_MacroAssembler *masm, int d8) { - __ emit_int8((unsigned char) d8); -} - -// EMIT_D16() -void emit_d16(C2_MacroAssembler *masm, int d16) { - __ emit_int16(d16); -} - -// EMIT_D32() -void emit_d32(C2_MacroAssembler *masm, int d32) { - __ emit_int32(d32); -} - -// emit 32 bit value and construct relocation entry from relocInfo::relocType -void emit_d32_reloc(C2_MacroAssembler *masm, int d32, relocInfo::relocType reloc, - int format) { - __ relocate(__ inst_mark(), reloc, format); - __ emit_int32(d32); -} - -// emit 32 bit value and construct relocation entry from RelocationHolder -void emit_d32_reloc(C2_MacroAssembler *masm, int d32, RelocationHolder const& rspec, - int format) { -#ifdef ASSERT - if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) { - assert(oopDesc::is_oop(cast_to_oop(d32)), "cannot embed broken oops in code"); - } -#endif - __ relocate(__ inst_mark(), rspec, format); - __ emit_int32(d32); -} - -// Access stack slot for load or store -void store_to_stackslot(C2_MacroAssembler *masm, int opcode, int rm_field, int disp) { - emit_opcode( masm, opcode ); // (e.g., FILD [ESP+src]) - if( -128 <= disp && disp <= 127 ) { - emit_rm( masm, 0x01, rm_field, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d8 (masm, disp); // Displacement // R/M byte - } else { - emit_rm( masm, 0x02, rm_field, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d32(masm, disp); // Displacement // R/M byte - } -} - - // rRegI ereg, memory mem) %{ // emit_reg_mem -void encode_RegMem( C2_MacroAssembler *masm, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) { - // There is no index & no scale, use form without SIB byte - if ((index == 0x4) && - (scale == 0) && (base != ESP_enc)) { - // If no displacement, mode is 0x0; unless base is [EBP] - if ( (displace == 0) && (base != EBP_enc) ) { - emit_rm(masm, 0x0, reg_encoding, base); - } - else { // If 8-bit displacement, mode 0x1 - if ((displace >= -128) && (displace <= 127) - && (disp_reloc == relocInfo::none) ) { - emit_rm(masm, 0x1, reg_encoding, base); - emit_d8(masm, displace); - } - else { // If 32-bit displacement - if (base == -1) { // Special flag for absolute address - emit_rm(masm, 0x0, reg_encoding, 0x5); - // (manual lies; no SIB needed here) - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - else { // Normal base + offset - emit_rm(masm, 0x2, reg_encoding, base); - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - } - } - } - else { // Else, encode with the SIB byte - // If no displacement, mode is 0x0; unless base is [EBP] - if (displace == 0 && (base != EBP_enc)) { // If no displacement - emit_rm(masm, 0x0, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - } - else { // If 8-bit displacement, mode 0x1 - if ((displace >= -128) && (displace <= 127) - && (disp_reloc == relocInfo::none) ) { - emit_rm(masm, 0x1, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - emit_d8(masm, displace); - } - else { // If 32-bit displacement - if (base == 0x04 ) { - emit_rm(masm, 0x2, reg_encoding, 0x4); - emit_rm(masm, scale, index, 0x04); - } else { - emit_rm(masm, 0x2, reg_encoding, 0x4); - emit_rm(masm, scale, index, base); - } - if ( disp_reloc != relocInfo::none ) { - emit_d32_reloc(masm, displace, disp_reloc, 1); - } else { - emit_d32 (masm, displace); - } - } - } - } -} - - -void encode_Copy( C2_MacroAssembler *masm, int dst_encoding, int src_encoding ) { - if( dst_encoding == src_encoding ) { - // reg-reg copy, use an empty encoding - } else { - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, dst_encoding, src_encoding ); - } -} - -void emit_cmpfp_fixup(MacroAssembler* masm) { - Label exit; - __ jccb(Assembler::noParity, exit); - __ pushf(); - // - // comiss/ucomiss instructions set ZF,PF,CF flags and - // zero OF,AF,SF for NaN values. - // Fixup flags by zeroing ZF,PF so that compare of NaN - // values returns 'less than' result (CF is set). - // Leave the rest of flags unchanged. - // - // 7 6 5 4 3 2 1 0 - // |S|Z|r|A|r|P|r|C| (r - reserved bit) - // 0 0 1 0 1 0 1 1 (0x2B) - // - __ andl(Address(rsp, 0), 0xffffff2b); - __ popf(); - __ bind(exit); -} - -static void emit_cmpfp3(MacroAssembler* masm, Register dst) { - Label done; - __ movl(dst, -1); - __ jcc(Assembler::parity, done); - __ jcc(Assembler::below, done); - __ setb(Assembler::notEqual, dst); - __ movzbl(dst, dst); - __ bind(done); -} - - -//============================================================================= -const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty; - -int ConstantTable::calculate_table_base_offset() const { - return 0; // absolute addressing, no offset -} - -bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } -void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { - ShouldNotReachHere(); -} - -void MachConstantBaseNode::emit(C2_MacroAssembler* masm, PhaseRegAlloc* ra_) const { - // Empty encoding -} - -uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { - return 0; -} - -#ifndef PRODUCT -void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { - st->print("# MachConstantBaseNode (empty encoding)"); -} -#endif - - -//============================================================================= -#ifndef PRODUCT -void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const { - Compile* C = ra_->C; - - int framesize = C->output()->frame_size_in_bytes(); - int bangsize = C->output()->bang_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove wordSize for return addr which is already pushed. - framesize -= wordSize; - - if (C->output()->need_stack_bang(bangsize)) { - framesize -= wordSize; - st->print("# stack bang (%d bytes)", bangsize); - st->print("\n\t"); - st->print("PUSH EBP\t# Save EBP"); - if (PreserveFramePointer) { - st->print("\n\t"); - st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); - } - if (framesize) { - st->print("\n\t"); - st->print("SUB ESP, #%d\t# Create frame",framesize); - } - } else { - st->print("SUB ESP, #%d\t# Create frame",framesize); - st->print("\n\t"); - framesize -= wordSize; - st->print("MOV [ESP + #%d], EBP\t# Save EBP",framesize); - if (PreserveFramePointer) { - st->print("\n\t"); - st->print("MOV EBP, ESP\t# Save the caller's SP into EBP"); - if (framesize > 0) { - st->print("\n\t"); - st->print("ADD EBP, #%d", framesize); - } - } - } - - if (VerifyStackAtCalls) { - st->print("\n\t"); - framesize -= wordSize; - st->print("MOV [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize); - } - - if( C->in_24_bit_fp_mode() ) { - st->print("\n\t"); - st->print("FLDCW \t# load 24 bit fpu control word"); - } - if (UseSSE >= 2 && VerifyFPU) { - st->print("\n\t"); - st->print("# verify FPU stack (must be clean on entry)"); - } - -#ifdef ASSERT - if (VerifyStackAtCalls) { - st->print("\n\t"); - st->print("# stack alignment check"); - } -#endif - st->cr(); -} -#endif - - -void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - Compile* C = ra_->C; - - int framesize = C->output()->frame_size_in_bytes(); - int bangsize = C->output()->bang_size_in_bytes(); - - __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode(), C->stub_function() != nullptr); - - C->output()->set_frame_complete(__ offset()); - - if (C->has_mach_constant_base_node()) { - // NOTE: We set the table base offset here because users might be - // emitted before MachConstantBaseNode. - ConstantTable& constant_table = C->output()->constant_table(); - constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); - } -} - -uint MachPrologNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it the hard way -} - -int MachPrologNode::reloc() const { - return 0; // a large enough number -} - -//============================================================================= -#ifndef PRODUCT -void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - Compile *C = ra_->C; - int framesize = C->output()->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove two words for return addr and rbp, - framesize -= 2*wordSize; - - if (C->max_vector_size() > 16) { - st->print("VZEROUPPER"); - st->cr(); st->print("\t"); - } - if (C->in_24_bit_fp_mode()) { - st->print("FLDCW standard control word"); - st->cr(); st->print("\t"); - } - if (framesize) { - st->print("ADD ESP,%d\t# Destroy frame",framesize); - st->cr(); st->print("\t"); - } - st->print_cr("POPL EBP"); st->print("\t"); - if (do_polling() && C->is_method_compilation()) { - st->print("CMPL rsp, poll_offset[thread] \n\t" - "JA #safepoint_stub\t" - "# Safepoint: poll for GC"); - } -} -#endif - -void MachEpilogNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - Compile *C = ra_->C; - - if (C->max_vector_size() > 16) { - // Clear upper bits of YMM registers when current compiled code uses - // wide vectors to avoid AVX <-> SSE transition penalty during call. - __ vzeroupper(); - } - // If method set FPU control word, restore to standard control word - if (C->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - - int framesize = C->output()->frame_size_in_bytes(); - assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); - // Remove two words for return addr and rbp, - framesize -= 2*wordSize; - - // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here - - if (framesize >= 128) { - emit_opcode(masm, 0x81); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d32(masm, framesize); - } else if (framesize) { - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, framesize); - } - - emit_opcode(masm, 0x58 | EBP_enc); - - if (StackReservedPages > 0 && C->has_reserved_stack_access()) { - __ reserved_stack_check(); - } - - if (do_polling() && C->is_method_compilation()) { - Register thread = as_Register(EBX_enc); - __ get_thread(thread); - Label dummy_label; - Label* code_stub = &dummy_label; - if (!C->output()->in_scratch_emit_size()) { - C2SafepointPollStub* stub = new (C->comp_arena()) C2SafepointPollStub(__ offset()); - C->output()->add_stub(stub); - code_stub = &stub->entry(); - } - __ set_inst_mark(); - __ relocate(relocInfo::poll_return_type); - __ clear_inst_mark(); - __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */); - } -} - -uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - -int MachEpilogNode::reloc() const { - return 0; // a large enough number -} - -const Pipeline * MachEpilogNode::pipeline() const { - return MachNode::pipeline_class(); -} - -//============================================================================= - -enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack }; -static enum RC rc_class( OptoReg::Name reg ) { - - if( !OptoReg::is_valid(reg) ) return rc_bad; - if (OptoReg::is_stack(reg)) return rc_stack; - - VMReg r = OptoReg::as_VMReg(reg); - if (r->is_Register()) return rc_int; - if (r->is_FloatRegister()) { - assert(UseSSE < 2, "shouldn't be used in SSE2+ mode"); - return rc_float; - } - if (r->is_KRegister()) return rc_kreg; - assert(r->is_XMMRegister(), "must be"); - return rc_xmm; -} - -static int impl_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, int offset, int reg, - int opcode, const char *op_str, int size, outputStream* st ) { - if( masm ) { - masm->set_inst_mark(); - emit_opcode (masm, opcode ); - encode_RegMem(masm, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none); - masm->clear_inst_mark(); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( opcode == 0x8B || opcode == 0x89 ) { // MOV - if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset); - else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]); - } else { // FLD, FST, PUSH, POP - st->print("%s [ESP + #%d]",op_str,offset); - } -#endif - } - int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - return size+3+offset_size; -} - -// Helper for XMM registers. Extra opcode bits, limited syntax. -static int impl_x_helper( C2_MacroAssembler *masm, bool do_size, bool is_load, - int offset, int reg_lo, int reg_hi, int size, outputStream* st ) { - int in_size_in_bits = Assembler::EVEX_32bit; - int evex_encoding = 0; - if (reg_lo+1 == reg_hi) { - in_size_in_bits = Assembler::EVEX_64bit; - evex_encoding = Assembler::VEX_W; - } - if (masm) { - // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, - // it maps more cases to single byte displacement - __ set_managed(); - if (reg_lo+1 == reg_hi) { // double move? - if (is_load) { - __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); - } else { - __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); - } - } else { - if (is_load) { - __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset)); - } else { - __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo])); - } - } -#ifndef PRODUCT - } else if (!do_size) { - if (size != 0) st->print("\n\t"); - if (reg_lo+1 == reg_hi) { // double move? - if (is_load) st->print("%s %s,[ESP + #%d]", - UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD", - Matcher::regName[reg_lo], offset); - else st->print("MOVSD [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); - } else { - if (is_load) st->print("MOVSS %s,[ESP + #%d]", - Matcher::regName[reg_lo], offset); - else st->print("MOVSS [ESP + #%d],%s", - offset, Matcher::regName[reg_lo]); - } -#endif - } - bool is_single_byte = false; - if ((UseAVX > 2) && (offset != 0)) { - is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding); - } - int offset_size = 0; - if (UseAVX > 2 ) { - offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4); - } else { - offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - } - size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX - // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix. - return size+5+offset_size; -} - - -static int impl_movx_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? - __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); - } else { - __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); - } -#ifndef PRODUCT - } else if (!do_size) { - if (size != 0) st->print("\n\t"); - if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers - if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move? - st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } else { - st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } - } else { - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move? - st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } else { - st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]); - } - } -#endif - } - // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix. - // Only MOVAPS SSE prefix uses 1 byte. EVEX uses an additional 2 bytes. - int sz = (UseAVX > 2) ? 6 : 4; - if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) && - UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3; - return size + sz; -} - -static int impl_movgpr2x_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - // 32-bit - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]), - as_Register(Matcher::_regEncode[src_lo])); -#ifndef PRODUCT - } else if (!do_size) { - st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); -#endif - } - return (UseAVX> 2) ? 6 : 4; -} - - -static int impl_movx2gpr_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int dst_lo, - int src_hi, int dst_hi, int size, outputStream* st ) { - // 32-bit - if (masm) { - // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way. - __ set_managed(); - __ movdl(as_Register(Matcher::_regEncode[dst_lo]), - as_XMMRegister(Matcher::_regEncode[src_lo])); -#ifndef PRODUCT - } else if (!do_size) { - st->print("movdl %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]); -#endif - } - return (UseAVX> 2) ? 6 : 4; -} - -static int impl_mov_helper( C2_MacroAssembler *masm, bool do_size, int src, int dst, int size, outputStream* st ) { - if( masm ) { - emit_opcode(masm, 0x8B ); - emit_rm (masm, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]); -#endif - } - return size+2; -} - -static int impl_fp_store_helper( C2_MacroAssembler *masm, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi, - int offset, int size, outputStream* st ) { - if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there - if( masm ) { - emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) - emit_d8( masm, 0xC0-1+Matcher::_regEncode[src_lo] ); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("FLD %s",Matcher::regName[src_lo]); -#endif - } - size += 2; - } - - int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/; - const char *op_str; - int op; - if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store? - op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D "; - op = 0xDD; - } else { // 32-bit store - op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S "; - op = 0xD9; - assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" ); - } - - return impl_helper(masm,do_size,false,offset,st_op,op,op_str,size, st); -} - -// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad. -static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, - int src_hi, int dst_hi, uint ireg, outputStream* st); - -void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, - int stack_offset, int reg, uint ireg, outputStream* st); - -static void vec_stack_to_stack_helper(C2_MacroAssembler *masm, int src_offset, - int dst_offset, uint ireg, outputStream* st) { - if (masm) { - switch (ireg) { - case Op_VecS: - __ pushl(Address(rsp, src_offset)); - __ popl (Address(rsp, dst_offset)); - break; - case Op_VecD: - __ pushl(Address(rsp, src_offset)); - __ popl (Address(rsp, dst_offset)); - __ pushl(Address(rsp, src_offset+4)); - __ popl (Address(rsp, dst_offset+4)); - break; - case Op_VecX: - __ movdqu(Address(rsp, -16), xmm0); - __ movdqu(xmm0, Address(rsp, src_offset)); - __ movdqu(Address(rsp, dst_offset), xmm0); - __ movdqu(xmm0, Address(rsp, -16)); - break; - case Op_VecY: - __ vmovdqu(Address(rsp, -32), xmm0); - __ vmovdqu(xmm0, Address(rsp, src_offset)); - __ vmovdqu(Address(rsp, dst_offset), xmm0); - __ vmovdqu(xmm0, Address(rsp, -32)); - break; - case Op_VecZ: - __ evmovdquq(Address(rsp, -64), xmm0, 2); - __ evmovdquq(xmm0, Address(rsp, src_offset), 2); - __ evmovdquq(Address(rsp, dst_offset), xmm0, 2); - __ evmovdquq(xmm0, Address(rsp, -64), 2); - break; - default: - ShouldNotReachHere(); - } -#ifndef PRODUCT - } else { - switch (ireg) { - case Op_VecS: - st->print("pushl [rsp + #%d]\t# 32-bit mem-mem spill\n\t" - "popl [rsp + #%d]", - src_offset, dst_offset); - break; - case Op_VecD: - st->print("pushl [rsp + #%d]\t# 64-bit mem-mem spill\n\t" - "popq [rsp + #%d]\n\t" - "pushl [rsp + #%d]\n\t" - "popq [rsp + #%d]", - src_offset, dst_offset, src_offset+4, dst_offset+4); - break; - case Op_VecX: - st->print("movdqu [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t" - "movdqu xmm0, [rsp + #%d]\n\t" - "movdqu [rsp + #%d], xmm0\n\t" - "movdqu xmm0, [rsp - #16]", - src_offset, dst_offset); - break; - case Op_VecY: - st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t" - "vmovdqu xmm0, [rsp + #%d]\n\t" - "vmovdqu [rsp + #%d], xmm0\n\t" - "vmovdqu xmm0, [rsp - #32]", - src_offset, dst_offset); - break; - case Op_VecZ: - st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t" - "vmovdqu xmm0, [rsp + #%d]\n\t" - "vmovdqu [rsp + #%d], xmm0\n\t" - "vmovdqu xmm0, [rsp - #64]", - src_offset, dst_offset); - break; - default: - ShouldNotReachHere(); - } -#endif - } -} - -uint MachSpillCopyNode::implementation( C2_MacroAssembler *masm, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { - // Get registers to move - OptoReg::Name src_second = ra_->get_reg_second(in(1)); - OptoReg::Name src_first = ra_->get_reg_first(in(1)); - OptoReg::Name dst_second = ra_->get_reg_second(this ); - OptoReg::Name dst_first = ra_->get_reg_first(this ); - - enum RC src_second_rc = rc_class(src_second); - enum RC src_first_rc = rc_class(src_first); - enum RC dst_second_rc = rc_class(dst_second); - enum RC dst_first_rc = rc_class(dst_first); - - assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); - - // Generate spill code! - int size = 0; - - if( src_first == dst_first && src_second == dst_second ) - return size; // Self copy, no move - - if (bottom_type()->isa_vect() != nullptr && bottom_type()->isa_vectmask() == nullptr) { - uint ireg = ideal_reg(); - assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity"); - assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity"); - assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity"); - if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { - // mem -> mem - int src_offset = ra_->reg2offset(src_first); - int dst_offset = ra_->reg2offset(dst_first); - vec_stack_to_stack_helper(masm, src_offset, dst_offset, ireg, st); - } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { - vec_mov_helper(masm, src_first, dst_first, src_second, dst_second, ireg, st); - } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { - int stack_offset = ra_->reg2offset(dst_first); - vec_spill_helper(masm, false, stack_offset, src_first, ireg, st); - } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { - int stack_offset = ra_->reg2offset(src_first); - vec_spill_helper(masm, true, stack_offset, dst_first, ireg, st); - } else { - ShouldNotReachHere(); - } - return 0; - } - - // -------------------------------------- - // Check for mem-mem move. push/pop to move. - if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) { - if( src_second == dst_first ) { // overlapping stack copy ranges - assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" ); - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); - src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits - } - // move low bits - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st); - if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st); - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st); - } - return size; - } - - // -------------------------------------- - // Check for integer reg-reg copy - if( src_first_rc == rc_int && dst_first_rc == rc_int ) - size = impl_mov_helper(masm,do_size,src_first,dst_first,size, st); - - // Check for integer store - if( src_first_rc == rc_int && dst_first_rc == rc_stack ) - size = impl_helper(masm,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st); - - // Check for integer load - if( src_first_rc == rc_stack && dst_first_rc == rc_int ) - size = impl_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st); - - // Check for integer reg-xmm reg copy - if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), - "no 64 bit integer-float reg moves" ); - return impl_movgpr2x_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - // -------------------------------------- - // Check for float reg-reg copy - if( src_first_rc == rc_float && dst_first_rc == rc_float ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || - (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" ); - if( masm ) { - - // Note the mucking with the register encode to compensate for the 0/1 - // indexing issue mentioned in a comment in the reg_def sections - // for FPR registers many lines above here. - - if( src_first != FPR1L_num ) { - emit_opcode (masm, 0xD9 ); // FLD ST(i) - emit_d8 (masm, 0xC0+Matcher::_regEncode[src_first]-1 ); - emit_opcode (masm, 0xDD ); // FSTP ST(i) - emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); - } else { - emit_opcode (masm, 0xDD ); // FST ST(i) - emit_d8 (masm, 0xD0+Matcher::_regEncode[dst_first]-1 ); - } -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]); - else st->print( "FST %s", Matcher::regName[dst_first]); -#endif - } - return size + ((src_first != FPR1L_num) ? 2+2 : 2); - } - - // Check for float store - if( src_first_rc == rc_float && dst_first_rc == rc_stack ) { - return impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st); - } - - // Check for float load - if( dst_first_rc == rc_float && src_first_rc == rc_stack ) { - int offset = ra_->reg2offset(src_first); - const char *op_str; - int op; - if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load? - op_str = "FLD_D"; - op = 0xDD; - } else { // 32-bit load - op_str = "FLD_S"; - op = 0xD9; - assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" ); - } - if( masm ) { - masm->set_inst_mark(); - emit_opcode (masm, op ); - encode_RegMem(masm, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none); - emit_opcode (masm, 0xDD ); // FSTP ST(i) - emit_d8 (masm, 0xD8+Matcher::_regEncode[dst_first] ); - masm->clear_inst_mark(); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]); -#endif - } - int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4); - return size + 3+offset_size+2; - } - - // Check for xmm reg-reg copy - if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) || - (src_first+1 == src_second && dst_first+1 == dst_second), - "no non-adjacent float-moves" ); - return impl_movx_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - - // Check for xmm reg-integer reg copy - if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) { - assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad), - "no 64 bit float-integer reg moves" ); - return impl_movx2gpr_helper(masm,do_size,src_first,dst_first,src_second, dst_second, size, st); - } - - // Check for xmm store - if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) { - return impl_x_helper(masm,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st); - } - - // Check for float xmm load - if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) { - return impl_x_helper(masm,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st); - } - - // Copy from float reg to xmm reg - if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) { - // copy to the top of stack from floating point reg - // and use LEA to preserve flags - if( masm ) { - emit_opcode(masm,0x8D); // LEA ESP,[ESP-8] - emit_rm(masm, 0x1, ESP_enc, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm,0xF8); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("LEA ESP,[ESP-8]"); -#endif - } - size += 4; - - size = impl_fp_store_helper(masm,do_size,src_first,src_second,dst_first,dst_second,0,size, st); - - // Copy from the temp memory to the xmm reg. - size = impl_x_helper(masm,do_size,true ,0,dst_first, dst_second, size, st); - - if( masm ) { - emit_opcode(masm,0x8D); // LEA ESP,[ESP+8] - emit_rm(masm, 0x1, ESP_enc, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm,0x08); -#ifndef PRODUCT - } else if( !do_size ) { - if( size != 0 ) st->print("\n\t"); - st->print("LEA ESP,[ESP+8]"); -#endif - } - size += 4; - return size; - } - - // AVX-512 opmask specific spilling. - if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - int offset = ra_->reg2offset(src_first); - if (masm != nullptr) { - __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset)); -#ifndef PRODUCT - } else { - st->print("KMOV %s, [ESP + %d]", Matcher::regName[dst_first], offset); -#endif - } - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - int offset = ra_->reg2offset(dst_first); - if (masm != nullptr) { - __ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first])); -#ifndef PRODUCT - } else { - st->print("KMOV [ESP + %d], %s", offset, Matcher::regName[src_first]); -#endif - } - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_int) { - Unimplemented(); - return 0; - } - - if (src_first_rc == rc_int && dst_first_rc == rc_kreg) { - Unimplemented(); - return 0; - } - - if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) { - assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair"); - assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair"); - if (masm != nullptr) { - __ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first])); -#ifndef PRODUCT - } else { - st->print("KMOV %s, %s", Matcher::regName[dst_first], Matcher::regName[src_first]); -#endif - } - return 0; - } - - assert( size > 0, "missed a case" ); - - // -------------------------------------------------------------------- - // Check for second bits still needing moving. - if( src_second == dst_second ) - return size; // Self copy; no move - assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" ); - - // Check for second word int-int move - if( src_second_rc == rc_int && dst_second_rc == rc_int ) - return impl_mov_helper(masm,do_size,src_second,dst_second,size, st); - - // Check for second word integer store - if( src_second_rc == rc_int && dst_second_rc == rc_stack ) - return impl_helper(masm,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st); - - // Check for second word integer load - if( dst_second_rc == rc_int && src_second_rc == rc_stack ) - return impl_helper(masm,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st); - - Unimplemented(); - return 0; // Mute compiler -} - -#ifndef PRODUCT -void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const { - implementation( nullptr, ra_, false, st ); -} -#endif - -void MachSpillCopyNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - implementation( masm, ra_, false, nullptr ); -} - -uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); -} - - -//============================================================================= -#ifndef PRODUCT -void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - int reg = ra_->get_reg_first(this); - st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset); -} -#endif - -void BoxLockNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - int reg = ra_->get_encode(this); - if( offset >= 128 ) { - emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] - emit_rm(masm, 0x2, reg, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d32(masm, offset); - } - else { - emit_opcode(masm, 0x8D); // LEA reg,[SP+offset] - emit_rm(masm, 0x1, reg, 0x04); - emit_rm(masm, 0x0, 0x04, ESP_enc); - emit_d8(masm, offset); - } -} - -uint BoxLockNode::size(PhaseRegAlloc *ra_) const { - int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); - if( offset >= 128 ) { - return 7; - } - else { - return 4; - } -} - -//============================================================================= -#ifndef PRODUCT -void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { - st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check"); - st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub"); - st->print_cr("\tNOP"); - st->print_cr("\tNOP"); - if( !OptoBreakpoint ) - st->print_cr("\tNOP"); -} -#endif - -void MachUEPNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { - __ ic_check(CodeEntryAlignment); -} - -uint MachUEPNode::size(PhaseRegAlloc *ra_) const { - return MachNode::size(ra_); // too many variables; just compute it - // the hard way -} - - -//============================================================================= - -// Vector calling convention not supported. -bool Matcher::supports_vector_calling_convention() { - return false; -} - -OptoRegPair Matcher::vector_return_value(uint ideal_reg) { - Unimplemented(); - return OptoRegPair(0, 0); -} - -// Is this branch offset short enough that a short branch can be used? -// -// NOTE: If the platform does not provide any short branch variants, then -// this method should return false for offset 0. -bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { - // The passed offset is relative to address of the branch. - // On 86 a branch displacement is calculated relative to address - // of a next instruction. - offset -= br_size; - - // the short version of jmpConUCF2 contains multiple branches, - // making the reach slightly less - if (rule == jmpConUCF2_rule) - return (-126 <= offset && offset <= 125); - return (-128 <= offset && offset <= 127); -} - -// Return whether or not this register is ever used as an argument. This -// function is used on startup to build the trampoline stubs in generateOptoStub. -// Registers not mentioned will be killed by the VM call in the trampoline, and -// arguments in those registers not be available to the callee. -bool Matcher::can_be_java_arg( int reg ) { - if( reg == ECX_num || reg == EDX_num ) return true; - if( (reg == XMM0_num || reg == XMM1_num ) && UseSSE>=1 ) return true; - if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true; - return false; -} - -bool Matcher::is_spillable_arg( int reg ) { - return can_be_java_arg(reg); -} - -uint Matcher::int_pressure_limit() -{ - return (INTPRESSURE == -1) ? 6 : INTPRESSURE; -} - -uint Matcher::float_pressure_limit() -{ - return (FLOATPRESSURE == -1) ? 6 : FLOATPRESSURE; -} - -bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { - // Use hardware integer DIV instruction when - // it is faster than a code which use multiply. - // Only when constant divisor fits into 32 bit - // (min_jint is excluded to get only correct - // positive 32 bit values from negative). - return VM_Version::has_fast_idiv() && - (divisor == (int)divisor && divisor != min_jint); -} - -// Register for DIVI projection of divmodI -RegMask Matcher::divI_proj_mask() { - return EAX_REG_mask(); -} - -// Register for MODI projection of divmodI -RegMask Matcher::modI_proj_mask() { - return EDX_REG_mask(); -} - -// Register for DIVL projection of divmodL -RegMask Matcher::divL_proj_mask() { - ShouldNotReachHere(); - return RegMask(); -} - -// Register for MODL projection of divmodL -RegMask Matcher::modL_proj_mask() { - ShouldNotReachHere(); - return RegMask(); -} - -const RegMask Matcher::method_handle_invoke_SP_save_mask() { - return NO_REG_mask(); -} - -// Returns true if the high 32 bits of the value is known to be zero. -bool is_operand_hi32_zero(Node* n) { - int opc = n->Opcode(); - if (opc == Op_AndL) { - Node* o2 = n->in(2); - if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { - return true; - } - } - if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) { - return true; - } - return false; -} - -%} - -//----------ENCODING BLOCK----------------------------------------------------- -// This block specifies the encoding classes used by the compiler to output -// byte streams. Encoding classes generate functions which are called by -// Machine Instruction Nodes in order to generate the bit encoding of the -// instruction. Operands specify their base encoding interface with the -// interface keyword. There are currently supported four interfaces, -// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an -// operand to generate a function which returns its register number when -// queried. CONST_INTER causes an operand to generate a function which -// returns the value of the constant when queried. MEMORY_INTER causes an -// operand to generate four functions which return the Base Register, the -// Index Register, the Scale Value, and the Offset Value of the operand when -// queried. COND_INTER causes an operand to generate six functions which -// return the encoding code (ie - encoding bits for the instruction) -// associated with each basic boolean condition for a conditional instruction. -// Instructions specify two basic values for encoding. They use the -// ins_encode keyword to specify their encoding class (which must be one of -// the class names specified in the encoding block), and they use the -// opcode keyword to specify, in order, their primary, secondary, and -// tertiary opcode. Only the opcode sections which a particular instruction -// needs for encoding need to be specified. -encode %{ - // Build emit functions for each basic byte or larger field in the intel - // encoding scheme (opcode, rm, sib, immediate), and call them from C++ - // code in the enc_class source block. Emit functions will live in the - // main source block for now. In future, we can generalize this by - // adding a syntax that specifies the sizes of fields in an order, - // so that the adlc can build the emit functions automagically - - // Set instruction mark in MacroAssembler. This is used only in - // instructions that emit bytes directly to the CodeBuffer wraped - // in the MacroAssembler. Should go away once all "instruct" are - // patched to emit bytes only using methods in MacroAssembler. - enc_class SetInstMark %{ - __ set_inst_mark(); - %} - - enc_class ClearInstMark %{ - __ clear_inst_mark(); - %} - - // Emit primary opcode - enc_class OpcP %{ - emit_opcode(masm, $primary); - %} - - // Emit secondary opcode - enc_class OpcS %{ - emit_opcode(masm, $secondary); - %} - - // Emit opcode directly - enc_class Opcode(immI d8) %{ - emit_opcode(masm, $d8$$constant); - %} - - enc_class SizePrefix %{ - emit_opcode(masm,0x66); - %} - - enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{ // OpcRegReg(Many) - emit_opcode(masm,$opcode$$constant); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class mov_r32_imm0( rRegI dst ) %{ - emit_opcode( masm, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32 - emit_d32 ( masm, 0x0 ); // imm32==0x0 - %} - - enc_class cdq_enc %{ - // Full implementation of Java idiv and irem; checks for - // special case as described in JVM spec., p.243 & p.271. - // - // normal case special case - // - // input : rax,: dividend min_int - // reg: divisor -1 - // - // output: rax,: quotient (= rax, idiv reg) min_int - // rdx: remainder (= rax, irem reg) 0 - // - // Code sequnce: - // - // 81 F8 00 00 00 80 cmp rax,80000000h - // 0F 85 0B 00 00 00 jne normal_case - // 33 D2 xor rdx,edx - // 83 F9 FF cmp rcx,0FFh - // 0F 84 03 00 00 00 je done - // normal_case: - // 99 cdq - // F7 F9 idiv rax,ecx - // done: - // - emit_opcode(masm,0x81); emit_d8(masm,0xF8); - emit_opcode(masm,0x00); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x80); // cmp rax,80000000h - emit_opcode(masm,0x0F); emit_d8(masm,0x85); - emit_opcode(masm,0x0B); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x00); // jne normal_case - emit_opcode(masm,0x33); emit_d8(masm,0xD2); // xor rdx,edx - emit_opcode(masm,0x83); emit_d8(masm,0xF9); emit_d8(masm,0xFF); // cmp rcx,0FFh - emit_opcode(masm,0x0F); emit_d8(masm,0x84); - emit_opcode(masm,0x03); emit_d8(masm,0x00); - emit_opcode(masm,0x00); emit_d8(masm,0x00); // je done - // normal_case: - emit_opcode(masm,0x99); // cdq - // idiv (note: must be emitted by the user of this rule) - // normal: - %} - - // Dense encoding for older common ops - enc_class Opc_plus(immI opcode, rRegI reg) %{ - emit_opcode(masm, $opcode$$constant + $reg$$reg); - %} - - - // Opcde enc_class for 8/32 bit immediate instructions with sign-extension - enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - emit_opcode(masm, $primary | 0x02); - } - else { // If 32-bit immediate - emit_opcode(masm, $primary); - } - %} - - enc_class OpcSErm (rRegI dst, immI imm) %{ // OpcSEr/m - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - emit_opcode(masm, $primary | 0x02); } - else { // If 32-bit immediate - emit_opcode(masm, $primary); - } - // Emit r/m byte with secondary opcode, after primary opcode. - emit_rm(masm, 0x3, $secondary, $dst$$reg); - %} - - enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits - // Check for 8-bit immediate, and set sign extend bit in opcode - if (($imm$$constant >= -128) && ($imm$$constant <= 127)) { - $$$emit8$imm$$constant; - } - else { // If 32-bit immediate - // Output immediate - $$$emit32$imm$$constant; - } - %} - - enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{ - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - int con = (int)$imm$$constant; // Throw away top bits - emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); - // Emit r/m byte with secondary opcode, after primary opcode. - emit_rm(masm, 0x3, $secondary, $dst$$reg); - if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); - else emit_d32(masm,con); - %} - - enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{ - // Emit primary opcode and set sign-extend bit - // Check for 8-bit immediate, and set sign extend bit in opcode - int con = (int)($imm$$constant >> 32); // Throw away bottom bits - emit_opcode(masm, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary); - // Emit r/m byte with tertiary opcode, after primary opcode. - emit_rm(masm, 0x3, $tertiary, HIGH_FROM_LOW_ENC($dst$$reg)); - if ((con >= -128) && (con <= 127)) emit_d8 (masm,con); - else emit_d32(masm,con); - %} - - enc_class OpcSReg (rRegI dst) %{ // BSWAP - emit_cc(masm, $secondary, $dst$$reg ); - %} - - enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP - int destlo = $dst$$reg; - int desthi = HIGH_FROM_LOW_ENC(destlo); - // bswap lo - emit_opcode(masm, 0x0F); - emit_cc(masm, 0xC8, destlo); - // bswap hi - emit_opcode(masm, 0x0F); - emit_cc(masm, 0xC8, desthi); - // xchg lo and hi - emit_opcode(masm, 0x87); - emit_rm(masm, 0x3, destlo, desthi); - %} - - enc_class RegOpc (rRegI div) %{ // IDIV, IMOD, JMP indirect, ... - emit_rm(masm, 0x3, $secondary, $div$$reg ); - %} - - enc_class enc_cmov(cmpOp cop ) %{ // CMOV - $$$emit8$primary; - emit_cc(masm, $secondary, $cop$$cmpcode); - %} - - enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV - int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1); - emit_d8(masm, op >> 8 ); - emit_d8(masm, op & 255); - %} - - // emulate a CMOV with a conditional branch around a MOV - enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV - // Invert sense of branch from sense of CMOV - emit_cc( masm, 0x70, ($cop$$cmpcode^1) ); - emit_d8( masm, $brOffs$$constant ); - %} - - enc_class enc_PartialSubtypeCheck( ) %{ - Register Redi = as_Register(EDI_enc); // result register - Register Reax = as_Register(EAX_enc); // super class - Register Recx = as_Register(ECX_enc); // killed - Register Resi = as_Register(ESI_enc); // sub class - Label miss; - - // NB: Callers may assume that, when $result is a valid register, - // check_klass_subtype_slow_path sets it to a nonzero value. - __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi, - nullptr, &miss, - /*set_cond_codes:*/ true); - if ($primary) { - __ xorptr(Redi, Redi); - } - __ bind(miss); - %} - - enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All - int start = __ offset(); - if (UseSSE >= 2) { - if (VerifyFPU) { - __ verify_FPU(0, "must be empty in SSE2+ mode"); - } - } else { - // External c_calling_convention expects the FPU stack to be 'clean'. - // Compiled code leaves it dirty. Do cleanup now. - __ empty_FPU_stack(); - } - if (sizeof_FFree_Float_Stack_All == -1) { - sizeof_FFree_Float_Stack_All = __ offset() - start; - } else { - assert(__ offset() - start == sizeof_FFree_Float_Stack_All, "wrong size"); - } - %} - - enc_class Verify_FPU_For_Leaf %{ - if( VerifyFPU ) { - __ verify_FPU( -3, "Returning from Runtime Leaf call"); - } - %} - - enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf - // This is the instruction starting address for relocation info. - __ set_inst_mark(); - $$$emit8$primary; - // CALL directly to the runtime - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - - if (UseSSE >= 2) { - BasicType rt = tf()->return_type(); - - if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) { - // A C runtime call where the return value is unused. In SSE2+ - // mode the result needs to be removed from the FPU stack. It's - // likely that this function call could be removed by the - // optimizer if the C function is a pure function. - __ ffree(0); - } else if (rt == T_FLOAT) { - __ lea(rsp, Address(rsp, -4)); - __ fstp_s(Address(rsp, 0)); - __ movflt(xmm0, Address(rsp, 0)); - __ lea(rsp, Address(rsp, 4)); - } else if (rt == T_DOUBLE) { - __ lea(rsp, Address(rsp, -8)); - __ fstp_d(Address(rsp, 0)); - __ movdbl(xmm0, Address(rsp, 0)); - __ lea(rsp, Address(rsp, 8)); - } - } - %} - - enc_class pre_call_resets %{ - // If method sets FPU control word restore it here - debug_only(int off0 = __ offset()); - if (ra_->C->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Clear upper bits of YMM registers when current compiled code uses - // wide vectors to avoid AVX <-> SSE transition penalty during call. - __ vzeroupper(); - debug_only(int off1 = __ offset()); - assert(off1 - off0 == pre_call_resets_size(), "correct size prediction"); - %} - - enc_class post_call_FPU %{ - // If method sets FPU control word do it here also - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } - %} - - enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL - // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine - // who we intended to call. - __ set_inst_mark(); - $$$emit8$primary; - - if (!_method) { - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - runtime_call_Relocation::spec(), - RELOC_IMM32); - __ clear_inst_mark(); - __ post_call_nop(); - } else { - int method_index = resolved_method_index(masm); - RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) - : static_call_Relocation::spec(method_index); - emit_d32_reloc(masm, ($meth$$method - (int)(__ pc()) - 4), - rspec, RELOC_DISP32); - __ post_call_nop(); - address mark = __ inst_mark(); - if (CodeBuffer::supports_shared_stubs() && _method->can_be_statically_bound()) { - // Calls of the same statically bound method can share - // a stub to the interpreter. - __ code()->shared_stub_to_interp_for(_method, __ code()->insts()->mark_off()); - __ clear_inst_mark(); - } else { - // Emit stubs for static call. - address stub = CompiledDirectCall::emit_to_interp_stub(masm, mark); - __ clear_inst_mark(); - if (stub == nullptr) { - ciEnv::current()->record_failure("CodeCache is full"); - return; - } - } - } - %} - - enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL - __ ic_call((address)$meth$$method, resolved_method_index(masm)); - __ post_call_nop(); - %} - - enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL - int disp = in_bytes(Method::from_compiled_offset()); - assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small"); - - // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())] - __ set_inst_mark(); - $$$emit8$primary; - emit_rm(masm, 0x01, $secondary, EAX_enc ); // R/M byte - emit_d8(masm, disp); // Displacement - __ clear_inst_mark(); - __ post_call_nop(); - %} - - enc_class RegOpcImm (rRegI dst, immI8 shift) %{ // SHL, SAR, SHR - $$$emit8$primary; - emit_rm(masm, 0x3, $secondary, $dst$$reg); - $$$emit8$shift$$constant; - %} - - enc_class LdImmI (rRegI dst, immI src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - emit_opcode(masm, 0xB8 + $dst$$reg); - $$$emit32$src$$constant; - %} - - enc_class LdImmP (rRegI dst, immI src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - emit_opcode(masm, $primary + $dst$$reg); - $$$emit32$src$$constant; - %} - - enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - int dst_enc = $dst$$reg; - int src_con = $src$$constant & 0x0FFFFFFFFL; - if (src_con == 0) { - // xor dst, dst - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, dst_enc, dst_enc); - } else { - emit_opcode(masm, $primary + dst_enc); - emit_d32(masm, src_con); - } - %} - - enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate - // Load immediate does not have a zero or sign extended version - // for 8-bit immediates - int dst_enc = $dst$$reg + 2; - int src_con = ((julong)($src$$constant)) >> 32; - if (src_con == 0) { - // xor dst, dst - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, dst_enc, dst_enc); - } else { - emit_opcode(masm, $primary + dst_enc); - emit_d32(masm, src_con); - } - %} - - - // Encode a reg-reg copy. If it is useless, then empty encoding. - enc_class enc_Copy( rRegI dst, rRegI src ) %{ - encode_Copy( masm, $dst$$reg, $src$$reg ); - %} - - enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{ - encode_Copy( masm, $dst$$reg, $src$$reg ); - %} - - enc_class RegReg (rRegI dst, rRegI src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many) - $$$emit8$primary; - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many) - $$$emit8$secondary; - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many) - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many) - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{ - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class Con32 (immI src) %{ // Con32(storeImmI) - // Output immediate - $$$emit32$src$$constant; - %} - - enc_class Con32FPR_as_bits(immFPR src) %{ // storeF_imm - // Output Float immediate bits - jfloat jf = $src$$constant; - int jf_as_bits = jint_cast( jf ); - emit_d32(masm, jf_as_bits); - %} - - enc_class Con32F_as_bits(immF src) %{ // storeX_imm - // Output Float immediate bits - jfloat jf = $src$$constant; - int jf_as_bits = jint_cast( jf ); - emit_d32(masm, jf_as_bits); - %} - - enc_class Con16 (immI src) %{ // Con16(storeImmI) - // Output immediate - $$$emit16$src$$constant; - %} - - enc_class Con_d32(immI src) %{ - emit_d32(masm,$src$$constant); - %} - - enc_class conmemref (eRegP t1) %{ // Con32(storeImmI) - // Output immediate memory reference - emit_rm(masm, 0x00, $t1$$reg, 0x05 ); - emit_d32(masm, 0x00); - %} - - enc_class lock_prefix( ) %{ - emit_opcode(masm,0xF0); // [Lock] - %} - - // Cmp-xchg long value. - // Note: we need to swap rbx, and rcx before and after the - // cmpxchg8 instruction because the instruction uses - // rcx as the high order word of the new value to store but - // our register encoding uses rbx,. - enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{ - - // XCHG rbx,ecx - emit_opcode(masm,0x87); - emit_opcode(masm,0xD9); - // [Lock] - emit_opcode(masm,0xF0); - // CMPXCHG8 [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xC7); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - // XCHG rbx,ecx - emit_opcode(masm,0x87); - emit_opcode(masm,0xD9); - %} - - enc_class enc_cmpxchg(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // CMPXCHG [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB1); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // CMPXCHGB [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB0); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{ - // [Lock] - emit_opcode(masm,0xF0); - - // 16-bit mode - emit_opcode(masm, 0x66); - - // CMPXCHGW [Eptr] - emit_opcode(masm,0x0F); - emit_opcode(masm,0xB1); - emit_rm( masm, 0x0, 1, $mem_ptr$$reg ); - %} - - enc_class enc_flags_ne_to_boolean( iRegI res ) %{ - int res_encoding = $res$$reg; - - // MOV res,0 - emit_opcode( masm, 0xB8 + res_encoding); - emit_d32( masm, 0 ); - // JNE,s fail - emit_opcode(masm,0x75); - emit_d8(masm, 5 ); - // MOV res,1 - emit_opcode( masm, 0xB8 + res_encoding); - emit_d32( masm, 1 ); - // fail: - %} - - enc_class RegMem (rRegI ereg, memory mem) %{ // emit_reg_mem - int reg_encoding = $ereg$$reg; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem - int reg_encoding = HIGH_FROM_LOW_ENC($ereg$$reg); // Hi register of pair, computed from lo - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp + 4; // Offset is 4 further in memory - assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" ); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, relocInfo::none); - %} - - enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{ - int r1, r2; - if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } - else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } - emit_opcode(masm,0x0F); - emit_opcode(masm,$tertiary); - emit_rm(masm, 0x3, r1, r2); - emit_d8(masm,$cnt$$constant); - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, r1); - emit_d8(masm,$cnt$$constant); - %} - - enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{ - emit_opcode( masm, 0x8B ); // Move - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); - if( $cnt$$constant > 32 ) { // Shift, if not by zero - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, $dst$$reg); - emit_d8(masm,$cnt$$constant-32); - } - emit_d8(masm,$primary); - emit_rm(masm, 0x3, $secondary, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_d8(masm,31); - %} - - enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{ - int r1, r2; - if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW_ENC($dst$$reg); } - else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW_ENC($dst$$reg); } - - emit_opcode( masm, 0x8B ); // Move r1,r2 - emit_rm(masm, 0x3, r1, r2); - if( $cnt$$constant > 32 ) { // Shift, if not by zero - emit_opcode(masm,$primary); - emit_rm(masm, 0x3, $secondary, r1); - emit_d8(masm,$cnt$$constant-32); - } - emit_opcode(masm,0x33); // XOR r2,r2 - emit_rm(masm, 0x3, r2, r2); - %} - - // Clone of RegMem but accepts an extra parameter to access each - // half of a double in memory; it never needs relocation info. - enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{ - emit_opcode(masm,$opcode$$constant); - int reg_encoding = $rm_reg$$reg; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp + $disp_for_half$$constant; - relocInfo::relocType disp_reloc = relocInfo::none; - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!! - // - // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant - // and it never needs relocation information. - // Frequently used to move data between FPU's Stack Top and memory. - enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{ - int rm_byte_opcode = $rm_opcode$$constant; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" ); - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, relocInfo::none); - %} - - enc_class RMopc_Mem (immI rm_opcode, memory mem) %{ - int rm_byte_opcode = $rm_opcode$$constant; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - %} - - enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{ // emit_reg_lea - int reg_encoding = $dst$$reg; - int base = $src0$$reg; // 0xFFFFFFFF indicates no base - int index = 0x04; // 0x04 indicates no index - int scale = 0x00; // 0x00 indicates no scale - int displace = $src1$$constant; // 0x00 indicates no displacement - relocInfo::relocType disp_reloc = relocInfo::none; - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - %} - - enc_class min_enc (rRegI dst, rRegI src) %{ // MIN - // Compare dst,src - emit_opcode(masm,0x3B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - // jmp dst < src around move - emit_opcode(masm,0x7C); - emit_d8(masm,2); - // move dst,src - emit_opcode(masm,0x8B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class max_enc (rRegI dst, rRegI src) %{ // MAX - // Compare dst,src - emit_opcode(masm,0x3B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - // jmp dst > src around move - emit_opcode(masm,0x7F); - emit_d8(masm,2); - // move dst,src - emit_opcode(masm,0x8B); - emit_rm(masm, 0x3, $dst$$reg, $src$$reg); - %} - - enc_class enc_FPR_store(memory mem, regDPR src) %{ - // If src is FPR1, we can just FST to store it. - // Else we need to FLD it to FPR1, then FSTP to store/pop it. - int reg_encoding = 0x2; // Just store - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - if( $src$$reg != FPR1L_enc ) { - reg_encoding = 0x3; // Store & pop - emit_opcode( masm, 0xD9 ); // FLD (i.e., push it) - emit_d8( masm, 0xC0-1+$src$$reg ); - } - __ set_inst_mark(); // Mark start of opcode for reloc info in mem operand - emit_opcode(masm,$primary); - encode_RegMem(masm, reg_encoding, base, index, scale, displace, disp_reloc); - __ clear_inst_mark(); - %} - - enc_class neg_reg(rRegI dst) %{ - // NEG $dst - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0x03, $dst$$reg ); - %} - - enc_class setLT_reg(eCXRegI dst) %{ - // SETLT $dst - emit_opcode(masm,0x0F); - emit_opcode(masm,0x9C); - emit_rm( masm, 0x3, 0x4, $dst$$reg ); - %} - - enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT - int tmpReg = $tmp$$reg; - - // SUB $p,$q - emit_opcode(masm,0x2B); - emit_rm(masm, 0x3, $p$$reg, $q$$reg); - // SBB $tmp,$tmp - emit_opcode(masm,0x1B); - emit_rm(masm, 0x3, tmpReg, tmpReg); - // AND $tmp,$y - emit_opcode(masm,0x23); - emit_rm(masm, 0x3, tmpReg, $y$$reg); - // ADD $p,$tmp - emit_opcode(masm,0x03); - emit_rm(masm, 0x3, $p$$reg, tmpReg); - %} - - enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x04); - // MOV $dst.hi,$dst.lo - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); - // CLR $dst.lo - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, $dst$$reg, $dst$$reg); -// small: - // SHLD $dst.hi,$dst.lo,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xA5); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg)); - // SHL $dst.lo,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x4, $dst$$reg ); - %} - - enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x04); - // MOV $dst.lo,$dst.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // CLR $dst.hi - emit_opcode(masm, 0x33); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($dst$$reg)); -// small: - // SHRD $dst.lo,$dst.hi,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xAD); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); - // SHR $dst.hi,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x5, HIGH_FROM_LOW_ENC($dst$$reg) ); - %} - - enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{ - // TEST shift,32 - emit_opcode(masm,0xF7); - emit_rm(masm, 0x3, 0, ECX_enc); - emit_d32(masm,0x20); - // JEQ,s small - emit_opcode(masm, 0x74); - emit_d8(masm, 0x05); - // MOV $dst.lo,$dst.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $dst$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // SAR $dst.hi,31 - emit_opcode(masm, 0xC1); - emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC($dst$$reg) ); - emit_d8(masm, 0x1F ); -// small: - // SHRD $dst.lo,$dst.hi,$shift - emit_opcode(masm,0x0F); - emit_opcode(masm,0xAD); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg); - // SAR $dst.hi,$shift" - emit_opcode(masm,0xD3); - emit_rm(masm, 0x3, 0x7, HIGH_FROM_LOW_ENC($dst$$reg) ); - %} - - - // ----------------- Encodings for floating point unit ----------------- - // May leave result in FPU-TOS or FPU reg depending on opcodes - enc_class OpcReg_FPR(regFPR src) %{ // FMUL, FDIV - $$$emit8$primary; - emit_rm(masm, 0x3, $secondary, $src$$reg ); - %} - - // Pop argument in FPR0 with FSTP ST(0) - enc_class PopFPU() %{ - emit_opcode( masm, 0xDD ); - emit_d8( masm, 0xD8 ); - %} - - // !!!!! equivalent to Pop_Reg_F - enc_class Pop_Reg_DPR( regDPR dst ) %{ - emit_opcode( masm, 0xDD ); // FSTP ST(i) - emit_d8( masm, 0xD8+$dst$$reg ); - %} - - enc_class Push_Reg_DPR( regDPR dst ) %{ - emit_opcode( masm, 0xD9 ); - emit_d8( masm, 0xC0-1+$dst$$reg ); // FLD ST(i-1) - %} - - enc_class strictfp_bias1( regDPR dst ) %{ - emit_opcode( masm, 0xDB ); // FLD m80real - emit_opcode( masm, 0x2D ); - emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias1() ); - emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 - emit_opcode( masm, 0xC8+$dst$$reg ); - %} - - enc_class strictfp_bias2( regDPR dst ) %{ - emit_opcode( masm, 0xDB ); // FLD m80real - emit_opcode( masm, 0x2D ); - emit_d32( masm, (int)StubRoutines::x86::addr_fpu_subnormal_bias2() ); - emit_opcode( masm, 0xDE ); // FMULP ST(dst), ST0 - emit_opcode( masm, 0xC8+$dst$$reg ); - %} - - // Special case for moving an integer register to a stack slot. - enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS - store_to_stackslot( masm, $primary, $src$$reg, $dst$$disp ); - %} - - // Special case for moving a register to a stack slot. - enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS - // Opcode already emitted - emit_rm( masm, 0x02, $src$$reg, ESP_enc ); // R/M byte - emit_rm( masm, 0x00, ESP_enc, ESP_enc); // SIB byte - emit_d32(masm, $dst$$disp); // Displacement - %} - - // Push the integer in stackSlot 'src' onto FP-stack - enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src] - store_to_stackslot( masm, $primary, $secondary, $src$$disp ); - %} - - // Push FPU's TOS float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst] - store_to_stackslot( masm, 0xD9, 0x03, $dst$$disp ); - %} - - // Same as Pop_Mem_F except for opcode - // Push FPU's TOS double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst] - store_to_stackslot( masm, 0xDD, 0x03, $dst$$disp ); - %} - - enc_class Pop_Reg_FPR( regFPR dst ) %{ - emit_opcode( masm, 0xDD ); // FSTP ST(i) - emit_d8( masm, 0xD8+$dst$$reg ); - %} - - enc_class Push_Reg_FPR( regFPR dst ) %{ - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$dst$$reg ); - %} - - // Push FPU's float to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{ - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( masm, 0xD9, pop, $dst$$disp ); // FST

_S [ESP+dst] - %} - - // Push FPU's double to a stack-slot, and pop FPU-stack - enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{ - int pop = 0x02; - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(i-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0x03; - } - store_to_stackslot( masm, 0xDD, pop, $dst$$disp ); // FST

_D [ESP+dst] - %} - - // Push FPU's double to a FPU-stack-slot, and pop FPU-stack - enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{ - int pop = 0xD0 - 1; // -1 since we skip FLD - if ($src$$reg != FPR1L_enc) { - emit_opcode( masm, 0xD9 ); // FLD ST(src-1) - emit_d8( masm, 0xC0-1+$src$$reg ); - pop = 0xD8; - } - emit_opcode( masm, 0xDD ); - emit_d8( masm, pop+$dst$$reg ); // FST

ST(i) - %} - - - enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{ - // load dst in FPR0 - emit_opcode( masm, 0xD9 ); - emit_d8( masm, 0xC0-1+$dst$$reg ); - if ($src$$reg != FPR1L_enc) { - // fincstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF7); - // swap src with FPR1: - // FXCH FPR1 with src - emit_opcode(masm, 0xD9); - emit_d8(masm, 0xC8-1+$src$$reg ); - // fdecstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF6); - } - %} - - enc_class Push_ModD_encoding(regD src0, regD src1) %{ - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src1$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ movdbl(Address(rsp, 0), $src0$$XMMRegister); - __ fld_d(Address(rsp, 0)); - %} - - enc_class Push_ModF_encoding(regF src0, regF src1) %{ - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src1$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ movflt(Address(rsp, 0), $src0$$XMMRegister); - __ fld_s(Address(rsp, 0)); - %} - - enc_class Push_ResultD(regD dst) %{ - __ fstp_d(Address(rsp, 0)); - __ movdbl($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, 8); - %} - - enc_class Push_ResultF(regF dst, immI d8) %{ - __ fstp_s(Address(rsp, 0)); - __ movflt($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, $d8$$constant); - %} - - enc_class Push_SrcD(regD src) %{ - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - %} - - enc_class push_stack_temp_qword() %{ - __ subptr(rsp, 8); - %} - - enc_class pop_stack_temp_qword() %{ - __ addptr(rsp, 8); - %} - - enc_class push_xmm_to_fpr1(regD src) %{ - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - %} - - enc_class Push_Result_Mod_DPR( regDPR src) %{ - if ($src$$reg != FPR1L_enc) { - // fincstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF7); - // FXCH FPR1 with src - emit_opcode(masm, 0xD9); - emit_d8(masm, 0xC8-1+$src$$reg ); - // fdecstp - emit_opcode (masm, 0xD9); - emit_opcode (masm, 0xF6); - } - %} - - enc_class fnstsw_sahf_skip_parity() %{ - // fnstsw ax - emit_opcode( masm, 0xDF ); - emit_opcode( masm, 0xE0 ); - // sahf - emit_opcode( masm, 0x9E ); - // jnp ::skip - emit_opcode( masm, 0x7B ); - emit_opcode( masm, 0x05 ); - %} - - enc_class emitModDPR() %{ - // fprem must be iterative - // :: loop - // fprem - emit_opcode( masm, 0xD9 ); - emit_opcode( masm, 0xF8 ); - // wait - emit_opcode( masm, 0x9b ); - // fnstsw ax - emit_opcode( masm, 0xDF ); - emit_opcode( masm, 0xE0 ); - // sahf - emit_opcode( masm, 0x9E ); - // jp ::loop - emit_opcode( masm, 0x0F ); - emit_opcode( masm, 0x8A ); - emit_opcode( masm, 0xF4 ); - emit_opcode( masm, 0xFF ); - emit_opcode( masm, 0xFF ); - emit_opcode( masm, 0xFF ); - %} - - enc_class fpu_flags() %{ - // fnstsw_ax - emit_opcode( masm, 0xDF); - emit_opcode( masm, 0xE0); - // test ax,0x0400 - emit_opcode( masm, 0x66 ); // operand-size prefix for 16-bit immediate - emit_opcode( masm, 0xA9 ); - emit_d16 ( masm, 0x0400 ); - // // // This sequence works, but stalls for 12-16 cycles on PPro - // // test rax,0x0400 - // emit_opcode( masm, 0xA9 ); - // emit_d32 ( masm, 0x00000400 ); - // - // jz exit (no unordered comparison) - emit_opcode( masm, 0x74 ); - emit_d8 ( masm, 0x02 ); - // mov ah,1 - treat as LT case (set carry flag) - emit_opcode( masm, 0xB4 ); - emit_d8 ( masm, 0x01 ); - // sahf - emit_opcode( masm, 0x9E); - %} - - enc_class cmpF_P6_fixup() %{ - // Fixup the integer flags in case comparison involved a NaN - // - // JNP exit (no unordered comparison, P-flag is set by NaN) - emit_opcode( masm, 0x7B ); - emit_d8 ( masm, 0x03 ); - // MOV AH,1 - treat as LT case (set carry flag) - emit_opcode( masm, 0xB4 ); - emit_d8 ( masm, 0x01 ); - // SAHF - emit_opcode( masm, 0x9E); - // NOP // target for branch to avoid branch to branch - emit_opcode( masm, 0x90); - %} - -// fnstsw_ax(); -// sahf(); -// movl(dst, nan_result); -// jcc(Assembler::parity, exit); -// movl(dst, less_result); -// jcc(Assembler::below, exit); -// movl(dst, equal_result); -// jcc(Assembler::equal, exit); -// movl(dst, greater_result); - -// less_result = 1; -// greater_result = -1; -// equal_result = 0; -// nan_result = -1; - - enc_class CmpF_Result(rRegI dst) %{ - // fnstsw_ax(); - emit_opcode( masm, 0xDF); - emit_opcode( masm, 0xE0); - // sahf - emit_opcode( masm, 0x9E); - // movl(dst, nan_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, -1 ); - // jcc(Assembler::parity, exit); - emit_opcode( masm, 0x7A ); - emit_d8 ( masm, 0x13 ); - // movl(dst, less_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, -1 ); - // jcc(Assembler::below, exit); - emit_opcode( masm, 0x72 ); - emit_d8 ( masm, 0x0C ); - // movl(dst, equal_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, 0 ); - // jcc(Assembler::equal, exit); - emit_opcode( masm, 0x74 ); - emit_d8 ( masm, 0x05 ); - // movl(dst, greater_result); - emit_opcode( masm, 0xB8 + $dst$$reg); - emit_d32( masm, 1 ); - %} - - - // Compare the longs and set flags - // BROKEN! Do Not use as-is - enc_class cmpl_test( eRegL src1, eRegL src2 ) %{ - // CMP $src1.hi,$src2.hi - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); - // JNE,s done - emit_opcode(masm,0x75); - emit_d8(masm, 2 ); - // CMP $src1.lo,$src2.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); -// done: - %} - - enc_class convert_int_long( regL dst, rRegI src ) %{ - // mov $dst.lo,$src - int dst_encoding = $dst$$reg; - int src_encoding = $src$$reg; - encode_Copy( masm, dst_encoding , src_encoding ); - // mov $dst.hi,$src - encode_Copy( masm, HIGH_FROM_LOW_ENC(dst_encoding), src_encoding ); - // sar $dst.hi,31 - emit_opcode( masm, 0xC1 ); - emit_rm(masm, 0x3, 7, HIGH_FROM_LOW_ENC(dst_encoding) ); - emit_d8(masm, 0x1F ); - %} - - enc_class convert_long_double( eRegL src ) %{ - // push $src.hi - emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); - // push $src.lo - emit_opcode(masm, 0x50+$src$$reg ); - // fild 64-bits at [SP] - emit_opcode(masm,0xdf); - emit_d8(masm, 0x6C); - emit_d8(masm, 0x24); - emit_d8(masm, 0x00); - // pop stack - emit_opcode(masm, 0x83); // add SP, #8 - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 0x8); - %} - - enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{ - // IMUL EDX:EAX,$src1 - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x5, $src1$$reg ); - // SAR EDX,$cnt-32 - int shift_count = ((int)$cnt$$constant) - 32; - if (shift_count > 0) { - emit_opcode(masm, 0xC1); - emit_rm(masm, 0x3, 7, $dst$$reg ); - emit_d8(masm, shift_count); - } - %} - - // this version doesn't have add sp, 8 - enc_class convert_long_double2( eRegL src ) %{ - // push $src.hi - emit_opcode(masm, 0x50+HIGH_FROM_LOW_ENC($src$$reg)); - // push $src.lo - emit_opcode(masm, 0x50+$src$$reg ); - // fild 64-bits at [SP] - emit_opcode(masm,0xdf); - emit_d8(masm, 0x6C); - emit_d8(masm, 0x24); - emit_d8(masm, 0x00); - %} - - enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{ - // Basic idea: long = (long)int * (long)int - // IMUL EDX:EAX, src - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x5, $src$$reg); - %} - - enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{ - // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) - // MUL EDX:EAX, src - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, $src$$reg); - %} - - enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{ - // Basic idea: lo(result) = lo(x_lo * y_lo) - // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - // MOV $tmp,$src.lo - encode_Copy( masm, $tmp$$reg, $src$$reg ); - // IMUL $tmp,EDX - emit_opcode( masm, 0x0F ); - emit_opcode( masm, 0xAF ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // MOV EDX,$src.hi - encode_Copy( masm, HIGH_FROM_LOW_ENC($dst$$reg), HIGH_FROM_LOW_ENC($src$$reg) ); - // IMUL EDX,EAX - emit_opcode( masm, 0x0F ); - emit_opcode( masm, 0xAF ); - emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $dst$$reg ); - // ADD $tmp,EDX - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - // MUL EDX:EAX,$src.lo - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, $src$$reg ); - // ADD EDX,ESI - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, HIGH_FROM_LOW_ENC($dst$$reg), $tmp$$reg ); - %} - - enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{ - // Basic idea: lo(result) = lo(src * y_lo) - // hi(result) = hi(src * y_lo) + lo(src * y_hi) - // IMUL $tmp,EDX,$src - emit_opcode( masm, 0x6B ); - emit_rm( masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($dst$$reg) ); - emit_d8( masm, (int)$src$$constant ); - // MOV EDX,$src - emit_opcode(masm, 0xB8 + EDX_enc); - emit_d32( masm, (int)$src$$constant ); - // MUL EDX:EAX,EDX - emit_opcode( masm, 0xF7 ); - emit_rm( masm, 0x3, 0x4, EDX_enc ); - // ADD EDX,ESI - emit_opcode( masm, 0x03 ); - emit_rm( masm, 0x3, EDX_enc, $tmp$$reg ); - %} - - enc_class long_div( eRegL src1, eRegL src2 ) %{ - // PUSH src1.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); - // PUSH src1.lo - emit_opcode(masm, 0x50+$src1$$reg ); - // PUSH src2.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); - // PUSH src2.lo - emit_opcode(masm, 0x50+$src2$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Restore stack - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 4*4); - %} - - enc_class long_mod( eRegL src1, eRegL src2 ) %{ - // PUSH src1.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src1$$reg) ); - // PUSH src1.lo - emit_opcode(masm, 0x50+$src1$$reg ); - // PUSH src2.hi - emit_opcode(masm, HIGH_FROM_LOW_ENC(0x50+$src2$$reg) ); - // PUSH src2.lo - emit_opcode(masm, 0x50+$src2$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Restore stack - emit_opcode(masm, 0x83); // add SP, #framesize - emit_rm(masm, 0x3, 0x00, ESP_enc); - emit_d8(masm, 4*4); - %} - - enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{ - // MOV $tmp,$src.lo - emit_opcode(masm, 0x8B); - emit_rm(masm, 0x3, $tmp$$reg, $src$$reg); - // OR $tmp,$src.hi - emit_opcode(masm, 0x0B); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg)); - %} - - enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{ - // CMP $src1.lo,$src2.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); - // JNE,s skip - emit_cc(masm, 0x70, 0x5); - emit_d8(masm,2); - // CMP $src1.hi,$src2.hi - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, HIGH_FROM_LOW_ENC($src1$$reg), HIGH_FROM_LOW_ENC($src2$$reg) ); - %} - - enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{ - // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $src1$$reg, $src2$$reg ); - // MOV $tmp,$src1.hi - emit_opcode( masm, 0x8B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src1$$reg) ); - // SBB $tmp,$src2.hi\t! Compute flags for long compare - emit_opcode( masm, 0x1B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src2$$reg) ); - %} - - enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{ - // XOR $tmp,$tmp - emit_opcode(masm,0x33); // XOR - emit_rm(masm,0x3, $tmp$$reg, $tmp$$reg); - // CMP $tmp,$src.lo - emit_opcode( masm, 0x3B ); - emit_rm(masm, 0x3, $tmp$$reg, $src$$reg ); - // SBB $tmp,$src.hi - emit_opcode( masm, 0x1B ); - emit_rm(masm, 0x3, $tmp$$reg, HIGH_FROM_LOW_ENC($src$$reg) ); - %} - - // Sniff, sniff... smells like Gnu Superoptimizer - enc_class neg_long( eRegL dst ) %{ - emit_opcode(masm,0xF7); // NEG hi - emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_opcode(masm,0xF7); // NEG lo - emit_rm (masm,0x3, 0x3, $dst$$reg ); - emit_opcode(masm,0x83); // SBB hi,0 - emit_rm (masm,0x3, 0x3, HIGH_FROM_LOW_ENC($dst$$reg)); - emit_d8 (masm,0 ); - %} - - enc_class enc_pop_rdx() %{ - emit_opcode(masm,0x5A); - %} - - enc_class enc_rethrow() %{ - __ set_inst_mark(); - emit_opcode(masm, 0xE9); // jmp entry - emit_d32_reloc(masm, (int)OptoRuntime::rethrow_stub() - ((int)__ pc())-4, - runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - %} - - - // Convert a double to an int. Java semantics require we do complex - // manglelations in the corner cases. So we set the rounding mode to - // 'zero', store the darned double down as an int, and reset the - // rounding mode to 'nearest'. The hardware throws an exception which - // patches up the correct value directly to the stack. - enc_class DPR2I_encoding( regDPR src ) %{ - // Flip to round-to-zero mode. We attempted to allow invalid-op - // exceptions here, so that a NAN or other corner-case value will - // thrown an exception (but normal values get converted at full speed). - // However, I2C adapters and other float-stack manglers leave pending - // invalid-op exceptions hanging. We would have to clear them before - // enabling them and that is more expensive than just testing for the - // invalid value Intel stores down in the corner cases. - emit_opcode(masm,0xD9); // FLDCW trunc - emit_opcode(masm,0x2D); - emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); - // Allocate a word - emit_opcode(masm,0x83); // SUB ESP,4 - emit_opcode(masm,0xEC); - emit_d8(masm,0x04); - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as an int, popping the FPU stack - emit_opcode(masm,0xDB); // FISTP [ESP] - emit_opcode(masm,0x1C); - emit_d8(masm,0x24); - // Restore the rounding mode; mask the exception - emit_opcode(masm,0xD9); // FLDCW std/24-bit mode - emit_opcode(masm,0x2D); - emit_d32( masm, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(masm,0x58); // POP EAX - emit_opcode(masm,0x3D); // CMP EAX,imm - emit_d32 (masm,0x80000000); // 0x80000000 - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07); // Size of slow_call - // Push src onto stack slow-path - emit_opcode(masm,0xD9 ); // FLD ST(i) - emit_d8 (masm,0xC0-1+$src$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (StubRoutines::x86::d2i_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Carry on here... - %} - - enc_class DPR2L_encoding( regDPR src ) %{ - emit_opcode(masm,0xD9); // FLDCW trunc - emit_opcode(masm,0x2D); - emit_d32(masm,(int)StubRoutines::x86::addr_fpu_cntrl_wrd_trunc()); - // Allocate a word - emit_opcode(masm,0x83); // SUB ESP,8 - emit_opcode(masm,0xEC); - emit_d8(masm,0x08); - // Encoding assumes a double has been pushed into FPR0. - // Store down the double as a long, popping the FPU stack - emit_opcode(masm,0xDF); // FISTP [ESP] - emit_opcode(masm,0x3C); - emit_d8(masm,0x24); - // Restore the rounding mode; mask the exception - emit_opcode(masm,0xD9); // FLDCW std/24-bit mode - emit_opcode(masm,0x2D); - emit_d32( masm, Compile::current()->in_24_bit_fp_mode() - ? (int)StubRoutines::x86::addr_fpu_cntrl_wrd_24() - : (int)StubRoutines::x86::addr_fpu_cntrl_wrd_std()); - - // Load the converted int; adjust CPU stack - emit_opcode(masm,0x58); // POP EAX - emit_opcode(masm,0x5A); // POP EDX - emit_opcode(masm,0x81); // CMP EDX,imm - emit_d8 (masm,0xFA); // rdx - emit_d32 (masm,0x80000000); // 0x80000000 - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07+4); // Size of slow_call - emit_opcode(masm,0x85); // TEST EAX,EAX - emit_opcode(masm,0xC0); // 2/rax,/rax, - emit_opcode(masm,0x75); // JNE around_slow_call - emit_d8 (masm,0x07); // Size of slow_call - // Push src onto stack slow-path - emit_opcode(masm,0xD9 ); // FLD ST(i) - emit_d8 (masm,0xC0-1+$src$$reg ); - // CALL directly to the runtime - __ set_inst_mark(); - emit_opcode(masm,0xE8); // Call into runtime - emit_d32_reloc(masm, (StubRoutines::x86::d2l_wrapper() - __ pc()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 ); - __ clear_inst_mark(); - __ post_call_nop(); - // Carry on here... - %} - - enc_class FMul_ST_reg( eRegFPR src1 ) %{ - // Operand was loaded from memory into fp ST (stack top) - // FMUL ST,$src /* D8 C8+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC8 + $src1$$reg); - %} - - enc_class FAdd_ST_reg( eRegFPR src2 ) %{ - // FADDP ST,src2 /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src2$$reg); - //could use FADDP src2,fpST /* DE C0+i */ - %} - - enc_class FAddP_reg_ST( eRegFPR src2 ) %{ - // FADDP src2,ST /* DE C0+i */ - emit_opcode(masm, 0xDE); - emit_opcode(masm, 0xC0 + $src2$$reg); - %} - - enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{ - // Operand has been loaded into fp ST (stack top) - // FSUB ST,$src1 - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xE0 + $src1$$reg); - - // FDIV - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xF0 + $src2$$reg); - %} - - enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{ - // Operand was loaded from memory into fp ST (stack top) - // FADD ST,$src /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src1$$reg); - - // FMUL ST,src2 /* D8 C*+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC8 + $src2$$reg); - %} - - - enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{ - // Operand was loaded from memory into fp ST (stack top) - // FADD ST,$src /* D8 C0+i */ - emit_opcode(masm, 0xD8); - emit_opcode(masm, 0xC0 + $src1$$reg); - - // FMULP src2,ST /* DE C8+i */ - emit_opcode(masm, 0xDE); - emit_opcode(masm, 0xC8 + $src2$$reg); - %} - - // Atomically load the volatile long - enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ - emit_opcode(masm,0xDF); - int rm_byte_opcode = 0x05; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - store_to_stackslot( masm, 0x0DF, 0x07, $dst$$disp ); - %} - - // Volatile Store Long. Must be atomic, so move it into - // the FP TOS and then do a 64-bit FIST. Has to probe the - // target address before the store (for null-ptr checks) - // so the memory operand is used twice in the encoding. - enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{ - store_to_stackslot( masm, 0x0DF, 0x05, $src$$disp ); - __ set_inst_mark(); // Mark start of FIST in case $mem has an oop - emit_opcode(masm,0xDF); - int rm_byte_opcode = 0x07; - int base = $mem$$base; - int index = $mem$$index; - int scale = $mem$$scale; - int displace = $mem$$disp; - relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals - encode_RegMem(masm, rm_byte_opcode, base, index, scale, displace, disp_reloc); - __ clear_inst_mark(); - %} - -%} - - -//----------FRAME-------------------------------------------------------------- -// Definition of frame structure and management information. -// -// S T A C K L A Y O U T Allocators stack-slot number -// | (to get allocators register number -// G Owned by | | v add OptoReg::stack0()) -// r CALLER | | -// o | +--------+ pad to even-align allocators stack-slot -// w V | pad0 | numbers; owned by CALLER -// t -----------+--------+----> Matcher::_in_arg_limit, unaligned -// h ^ | in | 5 -// | | args | 4 Holes in incoming args owned by SELF -// | | | | 3 -// | | +--------+ -// V | | old out| Empty on Intel, window on Sparc -// | old |preserve| Must be even aligned. -// | SP-+--------+----> Matcher::_old_SP, even aligned -// | | in | 3 area for Intel ret address -// Owned by |preserve| Empty on Sparc. -// SELF +--------+ -// | | pad2 | 2 pad to align old SP -// | +--------+ 1 -// | | locks | 0 -// | +--------+----> OptoReg::stack0(), even aligned -// | | pad1 | 11 pad to align new SP -// | +--------+ -// | | | 10 -// | | spills | 9 spills -// V | | 8 (pad0 slot for callee) -// -----------+--------+----> Matcher::_out_arg_limit, unaligned -// ^ | out | 7 -// | | args | 6 Holes in outgoing args owned by CALLEE -// Owned by +--------+ -// CALLEE | new out| 6 Empty on Intel, window on Sparc -// | new |preserve| Must be even-aligned. -// | SP-+--------+----> Matcher::_new_SP, even aligned -// | | | -// -// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is -// known from SELF's arguments and the Java calling convention. -// Region 6-7 is determined per call site. -// Note 2: If the calling convention leaves holes in the incoming argument -// area, those holes are owned by SELF. Holes in the outgoing area -// are owned by the CALLEE. Holes should not be necessary in the -// incoming area, as the Java calling convention is completely under -// the control of the AD file. Doubles can be sorted and packed to -// avoid holes. Holes in the outgoing arguments may be necessary for -// varargs C calling conventions. -// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is -// even aligned with pad0 as needed. -// Region 6 is even aligned. Region 6-7 is NOT even aligned; -// region 6-11 is even aligned; it may be padded out more so that -// the region from SP to FP meets the minimum stack alignment. - -frame %{ - // These three registers define part of the calling convention - // between compiled code and the interpreter. - inline_cache_reg(EAX); // Inline Cache Register - - // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] - cisc_spilling_operand_name(indOffset32); - - // Number of stack slots consumed by locking an object - sync_stack_slots(1); - - // Compiled code's Frame Pointer - frame_pointer(ESP); - // Interpreter stores its frame pointer in a register which is - // stored to the stack by I2CAdaptors. - // I2CAdaptors convert from interpreted java to compiled java. - interpreter_frame_pointer(EBP); - - // Stack alignment requirement - // Alignment size in bytes (128-bit -> 16 bytes) - stack_alignment(StackAlignmentInBytes); - - // Number of outgoing stack slots killed above the out_preserve_stack_slots - // for calls to C. Supports the var-args backing area for register parms. - varargs_C_out_slots_killed(0); - - // The after-PROLOG location of the return address. Location of - // return address specifies a type (REG or STACK) and a number - // representing the register number (i.e. - use a register name) or - // stack slot. - // Ret Addr is on stack in slot 0 if no locks or verification or alignment. - // Otherwise, it is above the locks and verification slot and alignment word - return_addr(STACK - 1 + - align_up((Compile::current()->in_preserve_stack_slots() + - Compile::current()->fixed_slots()), - stack_alignment_in_slots())); - - // Location of C & interpreter return values - c_return_value %{ - assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); - static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; - static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; - - // in SSE2+ mode we want to keep the FPU stack clean so pretend - // that C functions return float and double results in XMM0. - if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0_num); - if( ideal_reg == Op_RegF && UseSSE>=2 ) - return OptoRegPair(OptoReg::Bad,XMM0_num); - - return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); - %} - - // Location of return values - return_value %{ - assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); - static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num, EAX_num, FPR1L_num, FPR1L_num, EAX_num }; - static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num }; - if( ideal_reg == Op_RegD && UseSSE>=2 ) - return OptoRegPair(XMM0b_num,XMM0_num); - if( ideal_reg == Op_RegF && UseSSE>=1 ) - return OptoRegPair(OptoReg::Bad,XMM0_num); - return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); - %} - -%} - -//----------ATTRIBUTES--------------------------------------------------------- -//----------Operand Attributes------------------------------------------------- -op_attrib op_cost(0); // Required cost attribute - -//----------Instruction Attributes--------------------------------------------- -ins_attrib ins_cost(100); // Required cost attribute -ins_attrib ins_size(8); // Required size attribute (in bits) -ins_attrib ins_short_branch(0); // Required flag: is this instruction a - // non-matching short branch variant of some - // long branch? -ins_attrib ins_alignment(1); // Required alignment attribute (must be a power of 2) - // specifies the alignment that some part of the instruction (not - // necessarily the start) requires. If > 1, a compute_padding() - // function must be provided for the instruction - -//----------OPERANDS----------------------------------------------------------- -// Operand definitions must precede instruction definitions for correct parsing -// in the ADLC because operands constitute user defined types which are used in -// instruction definitions. - -//----------Simple Operands---------------------------------------------------- -// Immediate Operands -// Integer Immediate -operand immI() %{ - match(ConI); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for test vs zero -operand immI_0() %{ - predicate(n->get_int() == 0); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for increment -operand immI_1() %{ - predicate(n->get_int() == 1); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for decrement -operand immI_M1() %{ - predicate(n->get_int() == -1); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Valid scale values for addressing modes -operand immI2() %{ - predicate(0 <= n->get_int() && (n->get_int() <= 3)); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI8() %{ - predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); - match(ConI); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immU8() %{ - predicate((0 <= n->get_int()) && (n->get_int() <= 255)); - match(ConI); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immI16() %{ - predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); - match(ConI); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Int Immediate non-negative -operand immU31() -%{ - predicate(n->get_int() >= 0); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Constant for long shifts -operand immI_32() %{ - predicate( n->get_int() == 32 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_1_31() %{ - predicate( n->get_int() >= 1 && n->get_int() <= 31 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_32_63() %{ - predicate( n->get_int() >= 32 && n->get_int() <= 63 ); - match(ConI); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI_2() %{ - predicate( n->get_int() == 2 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_3() %{ - predicate( n->get_int() == 3 ); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_4() -%{ - predicate(n->get_int() == 4); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -operand immI_8() -%{ - predicate(n->get_int() == 8); - match(ConI); - - op_cost(0); - format %{ %} - interface(CONST_INTER); -%} - -// Pointer Immediate -operand immP() %{ - match(ConP); - - op_cost(10); - format %{ %} - interface(CONST_INTER); -%} - -// Null Pointer Immediate -operand immP0() %{ - predicate( n->get_ptr() == 0 ); - match(ConP); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate -operand immL() %{ - match(ConL); - - op_cost(20); - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate zero -operand immL0() %{ - predicate( n->get_long() == 0L ); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate zero -operand immL_M1() %{ - predicate( n->get_long() == -1L ); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long immediate from 0 to 127. -// Used for a shorter form of long mul by 10. -operand immL_127() %{ - predicate((0 <= n->get_long()) && (n->get_long() <= 127)); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: low 32-bit mask -operand immL_32bits() %{ - predicate(n->get_long() == 0xFFFFFFFFL); - match(ConL); - op_cost(0); - - format %{ %} - interface(CONST_INTER); -%} - -// Long Immediate: low 32-bit mask -operand immL32() %{ - predicate(n->get_long() == (int)(n->get_long())); - match(ConL); - op_cost(20); - - format %{ %} - interface(CONST_INTER); -%} - -//Double Immediate zero -operand immDPR0() %{ - // Do additional (and counter-intuitive) test against NaN to work around VC++ - // bug that generates code such that NaNs compare equal to 0.0 - predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) ); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate one -operand immDPR1() %{ - predicate( UseSSE<=1 && n->getd() == 1.0 ); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate -operand immDPR() %{ - predicate(UseSSE<=1); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -operand immD() %{ - predicate(UseSSE>=2); - match(ConD); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Double Immediate zero -operand immD0() %{ - // Do additional (and counter-intuitive) test against NaN to work around VC++ - // bug that generates code such that NaNs compare equal to 0.0 AND do not - // compare equal to -0.0. - predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 ); - match(ConD); - - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate zero -operand immFPR0() %{ - predicate(UseSSE == 0 && n->getf() == 0.0F); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate one -operand immFPR1() %{ - predicate(UseSSE == 0 && n->getf() == 1.0F); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate -operand immFPR() %{ - predicate( UseSSE == 0 ); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate -operand immF() %{ - predicate(UseSSE >= 1); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Float Immediate zero. Zero and not -0.0 -operand immF0() %{ - predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 ); - match(ConF); - - op_cost(5); - format %{ %} - interface(CONST_INTER); -%} - -// Immediates for special shifts (sign extend) - -// Constants for increment -operand immI_16() %{ - predicate( n->get_int() == 16 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand immI_24() %{ - predicate( n->get_int() == 24 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -// Constant for byte-wide masking -operand immI_255() %{ - predicate( n->get_int() == 255 ); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -// Constant for short-wide masking -operand immI_65535() %{ - predicate(n->get_int() == 65535); - match(ConI); - - format %{ %} - interface(CONST_INTER); -%} - -operand kReg() -%{ - constraint(ALLOC_IN_RC(vectmask_reg)); - match(RegVectMask); - format %{%} - interface(REG_INTER); -%} - -// Register Operands -// Integer Register -operand rRegI() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegI); - match(xRegI); - match(eAXRegI); - match(eBXRegI); - match(eCXRegI); - match(eDXRegI); - match(eDIRegI); - match(eSIRegI); - - format %{ %} - interface(REG_INTER); -%} - -// Subset of Integer Register -operand xRegI(rRegI reg) %{ - constraint(ALLOC_IN_RC(int_x_reg)); - match(reg); - match(eAXRegI); - match(eBXRegI); - match(eCXRegI); - match(eDXRegI); - - format %{ %} - interface(REG_INTER); -%} - -// Special Registers -operand eAXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(eax_reg)); - match(reg); - match(rRegI); - - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Special Registers -operand eBXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(ebx_reg)); - match(reg); - match(rRegI); - - format %{ "EBX" %} - interface(REG_INTER); -%} - -operand eCXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(ecx_reg)); - match(reg); - match(rRegI); - - format %{ "ECX" %} - interface(REG_INTER); -%} - -operand eDXRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(edx_reg)); - match(reg); - match(rRegI); - - format %{ "EDX" %} - interface(REG_INTER); -%} - -operand eDIRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(edi_reg)); - match(reg); - match(rRegI); - - format %{ "EDI" %} - interface(REG_INTER); -%} - -operand nadxRegI() %{ - constraint(ALLOC_IN_RC(nadx_reg)); - match(RegI); - match(eBXRegI); - match(eCXRegI); - match(eSIRegI); - match(eDIRegI); - - format %{ %} - interface(REG_INTER); -%} - -operand ncxRegI() %{ - constraint(ALLOC_IN_RC(ncx_reg)); - match(RegI); - match(eAXRegI); - match(eDXRegI); - match(eSIRegI); - match(eDIRegI); - - format %{ %} - interface(REG_INTER); -%} - -// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg -// // -operand eSIRegI(xRegI reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - match(rRegI); - - format %{ "ESI" %} - interface(REG_INTER); -%} - -// Pointer Register -operand anyRegP() %{ - constraint(ALLOC_IN_RC(any_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - match(eRegP); - - format %{ %} - interface(REG_INTER); -%} - -operand eRegP() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -operand rRegP() %{ - constraint(ALLOC_IN_RC(int_reg)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -// On windows95, EBP is not safe to use for implicit null tests. -operand eRegP_no_EBP() %{ - constraint(ALLOC_IN_RC(int_reg_no_ebp)); - match(RegP); - match(eAXRegP); - match(eBXRegP); - match(eCXRegP); - match(eDIRegP); - - op_cost(100); - format %{ %} - interface(REG_INTER); -%} - -operand pRegP() %{ - constraint(ALLOC_IN_RC(p_reg)); - match(RegP); - match(eBXRegP); - match(eDXRegP); - match(eSIRegP); - match(eDIRegP); - - format %{ %} - interface(REG_INTER); -%} - -// Special Registers -// Return a pointer value -operand eAXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(eax_reg)); - match(reg); - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Used in AtomicAdd -operand eBXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(ebx_reg)); - match(reg); - format %{ "EBX" %} - interface(REG_INTER); -%} - -// Tail-call (interprocedural jump) to interpreter -operand eCXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(ecx_reg)); - match(reg); - format %{ "ECX" %} - interface(REG_INTER); -%} - -operand eDXRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(edx_reg)); - match(reg); - format %{ "EDX" %} - interface(REG_INTER); -%} - -operand eSIRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - format %{ "ESI" %} - interface(REG_INTER); -%} - -// Used in rep stosw -operand eDIRegP(eRegP reg) %{ - constraint(ALLOC_IN_RC(edi_reg)); - match(reg); - format %{ "EDI" %} - interface(REG_INTER); -%} - -operand eRegL() %{ - constraint(ALLOC_IN_RC(long_reg)); - match(RegL); - match(eADXRegL); - - format %{ %} - interface(REG_INTER); -%} - -operand eADXRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(eadx_reg)); - match(reg); - - format %{ "EDX:EAX" %} - interface(REG_INTER); -%} - -operand eBCXRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(ebcx_reg)); - match(reg); - - format %{ "EBX:ECX" %} - interface(REG_INTER); -%} - -operand eBDPRegL( eRegL reg ) %{ - constraint(ALLOC_IN_RC(ebpd_reg)); - match(reg); - - format %{ "EBP:EDI" %} - interface(REG_INTER); -%} -// Special case for integer high multiply -operand eADXRegL_low_only() %{ - constraint(ALLOC_IN_RC(eadx_reg)); - match(RegL); - - format %{ "EAX" %} - interface(REG_INTER); -%} - -// Flags register, used as output of compare instructions -operand rFlagsReg() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS" %} - interface(REG_INTER); -%} - -// Flags register, used as output of compare instructions -operand eFlagsReg() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS" %} - interface(REG_INTER); -%} - -// Flags register, used as output of FLOATING POINT compare instructions -operand eFlagsRegU() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - - format %{ "EFLAGS_U" %} - interface(REG_INTER); -%} - -operand eFlagsRegUCF() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - predicate(false); - - format %{ "EFLAGS_U_CF" %} - interface(REG_INTER); -%} - -// Condition Code Register used by long compare -operand flagsReg_long_LTGE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_LTGE" %} - interface(REG_INTER); -%} -operand flagsReg_long_EQNE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_EQNE" %} - interface(REG_INTER); -%} -operand flagsReg_long_LEGT() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_LEGT" %} - interface(REG_INTER); -%} - -// Condition Code Register used by unsigned long compare -operand flagsReg_ulong_LTGE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_LTGE" %} - interface(REG_INTER); -%} -operand flagsReg_ulong_EQNE() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_EQNE" %} - interface(REG_INTER); -%} -operand flagsReg_ulong_LEGT() %{ - constraint(ALLOC_IN_RC(int_flags)); - match(RegFlags); - format %{ "FLAGS_U_LEGT" %} - interface(REG_INTER); -%} - -// Float register operands -operand regDPR() %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg)); - match(RegD); - match(regDPR1); - match(regDPR2); - format %{ %} - interface(REG_INTER); -%} - -operand regDPR1(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg0)); - match(reg); - format %{ "FPR1" %} - interface(REG_INTER); -%} - -operand regDPR2(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_reg1)); - match(reg); - format %{ "FPR2" %} - interface(REG_INTER); -%} - -operand regnotDPR1(regDPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_dbl_notreg0)); - match(reg); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand regFPR() %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_flt_reg)); - match(RegF); - match(regFPR1); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand regFPR1(regFPR reg) %{ - predicate( UseSSE < 2 ); - constraint(ALLOC_IN_RC(fp_flt_reg0)); - match(reg); - format %{ "FPR1" %} - interface(REG_INTER); -%} - -// XMM Float register operands -operand regF() %{ - predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(float_reg_legacy)); - match(RegF); - format %{ %} - interface(REG_INTER); -%} - -operand legRegF() %{ - predicate( UseSSE>=1 ); - constraint(ALLOC_IN_RC(float_reg_legacy)); - match(RegF); - format %{ %} - interface(REG_INTER); -%} - -// Float register operands -operand vlRegF() %{ - constraint(ALLOC_IN_RC(float_reg_vl)); - match(RegF); - - format %{ %} - interface(REG_INTER); -%} - -// XMM Double register operands -operand regD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(double_reg_legacy)); - match(RegD); - format %{ %} - interface(REG_INTER); -%} - -// Double register operands -operand legRegD() %{ - predicate( UseSSE>=2 ); - constraint(ALLOC_IN_RC(double_reg_legacy)); - match(RegD); - format %{ %} - interface(REG_INTER); -%} - -operand vlRegD() %{ - constraint(ALLOC_IN_RC(double_reg_vl)); - match(RegD); - - format %{ %} - interface(REG_INTER); -%} - -//----------Memory Operands---------------------------------------------------- -// Direct Memory Operand -operand direct(immP addr) %{ - match(addr); - - format %{ "[$addr]" %} - interface(MEMORY_INTER) %{ - base(0xFFFFFFFF); - index(0x4); - scale(0x0); - disp($addr); - %} -%} - -// Indirect Memory Operand -operand indirect(eRegP reg) %{ - constraint(ALLOC_IN_RC(int_reg)); - match(reg); - - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp(0x0); - %} -%} - -// Indirect Memory Plus Short Offset Operand -operand indOffset8(eRegP reg, immI8 off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand indOffset32(eRegP reg, immI off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand indOffset32X(rRegI reg, immP off) %{ - match(AddP off reg); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Index Register Plus Offset Operand -operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{ - match(AddP (AddP reg ireg) off); - - op_cost(10); - format %{"[$reg + $off + $ireg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp($off); - %} -%} - -// Indirect Memory Plus Index Register Plus Offset Operand -operand indIndex(eRegP reg, rRegI ireg) %{ - match(AddP reg ireg); - - op_cost(10); - format %{"[$reg + $ireg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale(0x0); - disp(0x0); - %} -%} - -// // ------------------------------------------------------------------------- -// // 486 architecture doesn't support "scale * index + offset" with out a base -// // ------------------------------------------------------------------------- -// // Scaled Memory Operands -// // Indirect Memory Times Scale Plus Offset Operand -// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{ -// match(AddP off (LShiftI ireg scale)); -// -// op_cost(10); -// format %{"[$off + $ireg << $scale]" %} -// interface(MEMORY_INTER) %{ -// base(0x4); -// index($ireg); -// scale($scale); -// disp($off); -// %} -// %} - -// Indirect Memory Times Scale Plus Index Register -operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{ - match(AddP reg (LShiftI ireg scale)); - - op_cost(10); - format %{"[$reg + $ireg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp(0x0); - %} -%} - -// Indirect Memory Times Scale Plus Index Register Plus Offset Operand -operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{ - match(AddP (AddP reg (LShiftI ireg scale)) off); - - op_cost(10); - format %{"[$reg + $off + $ireg << $scale]" %} - interface(MEMORY_INTER) %{ - base($reg); - index($ireg); - scale($scale); - disp($off); - %} -%} - -//----------Load Long Memory Operands------------------------------------------ -// The load-long idiom will use it's address expression again after loading -// the first word of the long. If the load-long destination overlaps with -// registers used in the addressing expression, the 2nd half will be loaded -// from a clobbered address. Fix this by requiring that load-long use -// address registers that do not overlap with the load-long target. - -// load-long support -operand load_long_RegP() %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(RegP); - match(eSIRegP); - op_cost(100); - format %{ %} - interface(REG_INTER); -%} - -// Indirect Memory Operand Long -operand load_long_indirect(load_long_RegP reg) %{ - constraint(ALLOC_IN_RC(esi_reg)); - match(reg); - - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp(0x0); - %} -%} - -// Indirect Memory Plus Long Offset Operand -operand load_long_indOffset32(load_long_RegP reg, immI off) %{ - match(AddP reg off); - - format %{ "[$reg + $off]" %} - interface(MEMORY_INTER) %{ - base($reg); - index(0x4); - scale(0x0); - disp($off); - %} -%} - -opclass load_long_memory(load_long_indirect, load_long_indOffset32); - - -//----------Special Memory Operands-------------------------------------------- -// Stack Slot Operand - This operand is used for loading and storing temporary -// values on the stack where a match requires a value to -// flow through memory. -operand stackSlotP(sRegP reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotI(sRegI reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotF(sRegF reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotD(sRegD reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -operand stackSlotL(sRegL reg) %{ - constraint(ALLOC_IN_RC(stack_slots)); - // No match rule because this operand is only generated in matching - format %{ "[$reg]" %} - interface(MEMORY_INTER) %{ - base(0x4); // ESP - index(0x4); // No Index - scale(0x0); // No Scale - disp($reg); // Stack Offset - %} -%} - -//----------Conditional Branch Operands---------------------------------------- -// Comparison Op - This is the operation of the comparison, and is limited to -// the following set of codes: -// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) -// -// Other attributes of the comparison, such as unsignedness, are specified -// by the comparison instruction that sets a condition code flags register. -// That result is represented by a flags operand whose subtype is appropriate -// to the unsignedness (etc.) of the comparison. -// -// Later, the instruction which matches both the Comparison Op (a Bool) and -// the flags (produced by the Cmp) specifies the coding of the comparison op -// by matching a specific subtype of Bool operand below, such as cmpOpU. - -// Comparison Code -operand cmpOp() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0xC, "l"); - greater_equal(0xD, "ge"); - less_equal(0xE, "le"); - greater(0xF, "g"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code, unsigned compare. Used by FP also, with -// C2 (unordered) turned into GT or LT already. The other bits -// C0 and C3 are turned into Carry & Zero flags. -operand cmpOpU() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Floating comparisons that don't require any fixup for the unordered case -operand cmpOpUCF() %{ - match(Bool); - predicate(n->as_Bool()->_test._test == BoolTest::lt || - n->as_Bool()->_test._test == BoolTest::ge || - n->as_Bool()->_test._test == BoolTest::le || - n->as_Bool()->_test._test == BoolTest::gt); - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - - -// Floating comparisons that can be fixed up with extra conditional jumps -operand cmpOpUCF2() %{ - match(Bool); - predicate(n->as_Bool()->_test._test == BoolTest::ne || - n->as_Bool()->_test._test == BoolTest::eq); - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x2, "b"); - greater_equal(0x3, "nb"); - less_equal(0x6, "be"); - greater(0x7, "nbe"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code for FP conditional move -operand cmpOp_fcmov() %{ - match(Bool); - - predicate(n->as_Bool()->_test._test != BoolTest::overflow && - n->as_Bool()->_test._test != BoolTest::no_overflow); - format %{ "" %} - interface(COND_INTER) %{ - equal (0x0C8); - not_equal (0x1C8); - less (0x0C0); - greater_equal(0x1C0); - less_equal (0x0D0); - greater (0x1D0); - overflow(0x0, "o"); // not really supported by the instruction - no_overflow(0x1, "no"); // not really supported by the instruction - %} -%} - -// Comparison Code used in long compares -operand cmpOp_commute() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0xF, "g"); - greater_equal(0xE, "le"); - less_equal(0xD, "ge"); - greater(0xC, "l"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -// Comparison Code used in unsigned long compares -operand cmpOpU_commute() %{ - match(Bool); - - format %{ "" %} - interface(COND_INTER) %{ - equal(0x4, "e"); - not_equal(0x5, "ne"); - less(0x7, "nbe"); - greater_equal(0x6, "be"); - less_equal(0x3, "nb"); - greater(0x2, "b"); - overflow(0x0, "o"); - no_overflow(0x1, "no"); - %} -%} - -//----------OPERAND CLASSES---------------------------------------------------- -// Operand Classes are groups of operands that are used as to simplify -// instruction definitions by not requiring the AD writer to specify separate -// instructions for every form of operand when the instruction accepts -// multiple operand types with the same basic encoding and format. The classic -// case of this is memory operands. - -opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset, - indIndex, indIndexScale, indIndexScaleOffset); - -// Long memory operations are encoded in 2 instructions and a +4 offset. -// This means some kind of offset is always required and you cannot use -// an oop as the offset (done when working on static globals). -opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset, - indIndex, indIndexScale, indIndexScaleOffset); - - -//----------PIPELINE----------------------------------------------------------- -// Rules which define the behavior of the target architectures pipeline. -pipeline %{ - -//----------ATTRIBUTES--------------------------------------------------------- -attributes %{ - variable_size_instructions; // Fixed size instructions - max_instructions_per_bundle = 3; // Up to 3 instructions per bundle - instruction_unit_size = 1; // An instruction is 1 bytes long - instruction_fetch_unit_size = 16; // The processor fetches one line - instruction_fetch_units = 1; // of 16 bytes - - // List of nop instructions - nops( MachNop ); -%} - -//----------RESOURCES---------------------------------------------------------- -// Resources are the functional units available to the machine - -// Generic P2/P3 pipeline -// 3 decoders, only D0 handles big operands; a "bundle" is the limit of -// 3 instructions decoded per cycle. -// 2 load/store ops per cycle, 1 branch, 1 FPU, -// 2 ALU op, only ALU0 handles mul/div instructions. -resources( D0, D1, D2, DECODE = D0 | D1 | D2, - MS0, MS1, MEM = MS0 | MS1, - BR, FPU, - ALU0, ALU1, ALU = ALU0 | ALU1 ); - -//----------PIPELINE DESCRIPTION----------------------------------------------- -// Pipeline Description specifies the stages in the machine's pipeline - -// Generic P2/P3 pipeline -pipe_desc(S0, S1, S2, S3, S4, S5); - -//----------PIPELINE CLASSES--------------------------------------------------- -// Pipeline Classes describe the stages in which input and output are -// referenced by the hardware pipeline. - -// Naming convention: ialu or fpu -// Then: _reg -// Then: _reg if there is a 2nd register -// Then: _long if it's a pair of instructions implementing a long -// Then: _fat if it requires the big decoder -// Or: _mem if it requires the big decoder and a memory unit. - -// Integer ALU reg operation -pipe_class ialu_reg(rRegI dst) %{ - single_instruction; - dst : S4(write); - dst : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Long ALU reg operation -pipe_class ialu_reg_long(eRegL dst) %{ - instruction_count(2); - dst : S4(write); - dst : S3(read); - DECODE : S0(2); // any 2 decoders - ALU : S3(2); // both alus -%} - -// Integer ALU reg operation using big decoder -pipe_class ialu_reg_fat(rRegI dst) %{ - single_instruction; - dst : S4(write); - dst : S3(read); - D0 : S0; // big decoder only - ALU : S3; // any alu -%} - -// Long ALU reg operation using big decoder -pipe_class ialu_reg_long_fat(eRegL dst) %{ - instruction_count(2); - dst : S4(write); - dst : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S3(2); // any 2 alus -%} - -// Integer ALU reg-reg operation -pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Long ALU reg-reg operation -pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - DECODE : S0(2); // any 2 decoders - ALU : S3(2); // both alus -%} - -// Integer ALU reg-reg operation -pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - D0 : S0; // big decoder only - ALU : S3; // any alu -%} - -// Long ALU reg-reg operation -pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S3(2); // both alus -%} - -// Integer ALU reg-mem operation -pipe_class ialu_reg_mem(rRegI dst, memory mem) %{ - single_instruction; - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; // any mem -%} - -// Long ALU reg-mem operation -pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{ - instruction_count(2); - dst : S5(write); - mem : S3(read); - D0 : S0(2); // big decoder only; twice - ALU : S4(2); // any 2 alus - MEM : S3(2); // both mems -%} - -// Integer mem operation (prefetch) -pipe_class ialu_mem(memory mem) -%{ - single_instruction; - mem : S3(read); - D0 : S0; // big decoder only - MEM : S3; // any mem -%} - -// Integer Store to Memory -pipe_class ialu_mem_reg(memory mem, rRegI src) %{ - single_instruction; - mem : S3(read); - src : S5(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Long Store to Memory -pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{ - instruction_count(2); - mem : S3(read); - src : S5(read); - D0 : S0(2); // big decoder only; twice - ALU : S4(2); // any 2 alus - MEM : S3(2); // Both mems -%} - -// Integer Store to Memory -pipe_class ialu_mem_imm(memory mem) %{ - single_instruction; - mem : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Integer ALU0 reg-reg operation -pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - D0 : S0; // Big decoder only - ALU0 : S3; // only alu0 -%} - -// Integer ALU0 reg-mem operation -pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{ - single_instruction; - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - ALU0 : S4; // ALU0 only - MEM : S3; // any mem -%} - -// Integer ALU reg-reg operation -pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - src2 : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Integer ALU reg-imm operation -pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - DECODE : S0; // any decoder - ALU : S3; // any alu -%} - -// Integer ALU reg-mem operation -pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{ - single_instruction; - cr : S4(write); - src1 : S3(read); - src2 : S3(read); - D0 : S0; // big decoder only - ALU : S4; // any alu - MEM : S3; -%} - -// Conditional move reg-reg -pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{ - instruction_count(4); - y : S4(read); - q : S3(read); - p : S3(read); - DECODE : S0(4); // any decoder -%} - -// Conditional move reg-reg -pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder -%} - -// Conditional move reg-mem -pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder - MEM : S3; -%} - -// Conditional move reg-reg long -pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0(2); // any 2 decoders -%} - -// Conditional move double reg-reg -pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{ - single_instruction; - dst : S4(write); - src : S3(read); - cr : S3(read); - DECODE : S0; // any decoder -%} - -// Float reg-reg operation -pipe_class fpu_reg(regDPR dst) %{ - instruction_count(2); - dst : S3(read); - DECODE : S0(2); // any 2 decoders - FPU : S3; -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{ - instruction_count(2); - dst : S4(write); - src : S3(read); - DECODE : S0(2); // any 2 decoders - FPU : S3; -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{ - instruction_count(3); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - DECODE : S0(3); // any 3 decoders - FPU : S3(2); -%} - -// Float reg-reg operation -pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{ - instruction_count(4); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - src3 : S3(read); - DECODE : S0(4); // any 3 decoders - FPU : S3(2); -%} - -// Float reg-reg operation -pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{ - instruction_count(4); - dst : S4(write); - src1 : S3(read); - src2 : S3(read); - src3 : S3(read); - DECODE : S1(3); // any 3 decoders - D0 : S0; // Big decoder only - FPU : S3(2); - MEM : S3; -%} - -// Float reg-mem operation -pipe_class fpu_reg_mem(regDPR dst, memory mem) %{ - instruction_count(2); - dst : S5(write); - mem : S3(read); - D0 : S0; // big decoder only - DECODE : S1; // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float reg-mem operation -pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{ - instruction_count(3); - dst : S5(write); - src1 : S3(read); - mem : S3(read); - D0 : S0; // big decoder only - DECODE : S1(2); // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float mem-reg operation -pipe_class fpu_mem_reg(memory mem, regDPR src) %{ - instruction_count(2); - src : S5(read); - mem : S3(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S1; // big decoder only - FPU : S4; - MEM : S3; // any mem -%} - -pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - mem : S3(read); - DECODE : S0(2); // any decoder for FPU PUSH - D0 : S1; // big decoder only - FPU : S4; - MEM : S3; // any mem -%} - -pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - mem : S4(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S0(2); // big decoder only - FPU : S4; - MEM : S3(2); // any mem -%} - -pipe_class fpu_mem_mem(memory dst, memory src1) %{ - instruction_count(2); - src1 : S3(read); - dst : S4(read); - D0 : S0(2); // big decoder only - MEM : S3(2); // any mem -%} - -pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{ - instruction_count(3); - src1 : S3(read); - src2 : S3(read); - dst : S4(read); - D0 : S0(3); // big decoder only - FPU : S4; - MEM : S3(3); // any mem -%} - -pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{ - instruction_count(3); - src1 : S4(read); - mem : S4(read); - DECODE : S0; // any decoder for FPU PUSH - D0 : S0(2); // big decoder only - FPU : S4; - MEM : S3(2); // any mem -%} - -// Float load constant -pipe_class fpu_reg_con(regDPR dst) %{ - instruction_count(2); - dst : S5(write); - D0 : S0; // big decoder only for the load - DECODE : S1; // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// Float load constant -pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{ - instruction_count(3); - dst : S5(write); - src : S3(read); - D0 : S0; // big decoder only for the load - DECODE : S1(2); // any decoder for FPU POP - FPU : S4; - MEM : S3; // any mem -%} - -// UnConditional branch -pipe_class pipe_jmp( label labl ) %{ - single_instruction; - BR : S3; -%} - -// Conditional branch -pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{ - single_instruction; - cr : S1(read); - BR : S3; -%} - -// Allocation idiom -pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{ - instruction_count(1); force_serialization; - fixed_latency(6); - heap_ptr : S3(read); - DECODE : S0(3); - D0 : S2; - MEM : S3; - ALU : S3(2); - dst : S5(write); - BR : S5; -%} - -// Generic big/slow expanded idiom -pipe_class pipe_slow( ) %{ - instruction_count(10); multiple_bundles; force_serialization; - fixed_latency(100); - D0 : S0(2); - MEM : S3(2); -%} - -// The real do-nothing guy -pipe_class empty( ) %{ - instruction_count(0); -%} - -// Define the class for the Nop node -define %{ - MachNop = empty; -%} - -%} - -//----------INSTRUCTIONS------------------------------------------------------- -// -// match -- States which machine-independent subtree may be replaced -// by this instruction. -// ins_cost -- The estimated cost of this instruction is used by instruction -// selection to identify a minimum cost tree of machine -// instructions that matches a tree of machine-independent -// instructions. -// format -- A string providing the disassembly for this instruction. -// The value of an instruction's operand may be inserted -// by referring to it with a '$' prefix. -// opcode -- Three instruction opcodes may be provided. These are referred -// to within an encode class as $primary, $secondary, and $tertiary -// respectively. The primary opcode is commonly used to -// indicate the type of machine instruction, while secondary -// and tertiary are often used for prefix options or addressing -// modes. -// ins_encode -- A list of encode classes with parameters. The encode class -// name must have been defined in an 'enc_class' specification -// in the encode section of the architecture description. - -// Dummy reg-to-reg vector moves. Removed during post-selection cleanup. -// Load Float -instruct MoveF2LEG(legRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveLEG2F(regF dst, legRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveF2VL(vlRegF dst, regF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Float -instruct MoveVL2F(regF dst, vlRegF src) %{ - match(Set dst src); - format %{ "movss $dst,$src\t! load float (4 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - - - -// Load Double -instruct MoveD2LEG(legRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveLEG2D(regD dst, legRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveD2VL(vlRegD dst, regD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -// Load Double -instruct MoveVL2D(regD dst, vlRegD src) %{ - match(Set dst src); - format %{ "movsd $dst,$src\t! load double (8 bytes)" %} - ins_encode %{ - ShouldNotReachHere(); - %} - ins_pipe( fpu_reg_reg ); -%} - -//----------BSWAP-Instruction-------------------------------------------------- -instruct bytes_reverse_int(rRegI dst) %{ - match(Set dst (ReverseBytesI dst)); - - format %{ "BSWAP $dst" %} - opcode(0x0F, 0xC8); - ins_encode( OpcP, OpcSReg(dst) ); - ins_pipe( ialu_reg ); -%} - -instruct bytes_reverse_long(eRegL dst) %{ - match(Set dst (ReverseBytesL dst)); - - format %{ "BSWAP $dst.lo\n\t" - "BSWAP $dst.hi\n\t" - "XCHG $dst.lo $dst.hi" %} - - ins_cost(125); - ins_encode( bswap_long_bytes(dst) ); - ins_pipe( ialu_reg_reg); -%} - -instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{ - match(Set dst (ReverseBytesUS dst)); - effect(KILL cr); - - format %{ "BSWAP $dst\n\t" - "SHR $dst,16\n\t" %} - ins_encode %{ - __ bswapl($dst$$Register); - __ shrl($dst$$Register, 16); - %} - ins_pipe( ialu_reg ); -%} - -instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{ - match(Set dst (ReverseBytesS dst)); - effect(KILL cr); - - format %{ "BSWAP $dst\n\t" - "SAR $dst,16\n\t" %} - ins_encode %{ - __ bswapl($dst$$Register); - __ sarl($dst$$Register, 16); - %} - ins_pipe( ialu_reg ); -%} - - -//---------- Zeros Count Instructions ------------------------------------------ - -instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosI src)); - effect(KILL cr); - - format %{ "LZCNT $dst, $src\t# count leading zeros (int)" %} - ins_encode %{ - __ lzcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(!UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosI src)); - effect(KILL cr); - - format %{ "BSR $dst, $src\t# count leading zeros (int)\n\t" - "JNZ skip\n\t" - "MOV $dst, -1\n" - "skip:\n\t" - "NEG $dst\n\t" - "ADD $dst, 31" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label skip; - __ bsrl(Rdst, Rsrc); - __ jccb(Assembler::notZero, skip); - __ movl(Rdst, -1); - __ bind(skip); - __ negl(Rdst); - __ addl(Rdst, BitsPerInt - 1); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "LZCNT $dst, $src.hi\t# count leading zeros (long)\n\t" - "JNC done\n\t" - "LZCNT $dst, $src.lo\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label done; - __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::carryClear, done); - __ lzcntl(Rdst, Rsrc); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(!UseCountLeadingZerosInstruction); - match(Set dst (CountLeadingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "BSR $dst, $src.hi\t# count leading zeros (long)\n\t" - "JZ msw_is_zero\n\t" - "ADD $dst, 32\n\t" - "JMP not_zero\n" - "msw_is_zero:\n\t" - "BSR $dst, $src.lo\n\t" - "JNZ not_zero\n\t" - "MOV $dst, -1\n" - "not_zero:\n\t" - "NEG $dst\n\t" - "ADD $dst, 63\n" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label msw_is_zero; - Label not_zero; - __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::zero, msw_is_zero); - __ addl(Rdst, BitsPerInt); - __ jmpb(not_zero); - __ bind(msw_is_zero); - __ bsrl(Rdst, Rsrc); - __ jccb(Assembler::notZero, not_zero); - __ movl(Rdst, -1); - __ bind(not_zero); - __ negl(Rdst); - __ addl(Rdst, BitsPerLong - 1); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosI src)); - effect(KILL cr); - - format %{ "TZCNT $dst, $src\t# count trailing zeros (int)" %} - ins_encode %{ - __ tzcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(!UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosI src)); - effect(KILL cr); - - format %{ "BSF $dst, $src\t# count trailing zeros (int)\n\t" - "JNZ done\n\t" - "MOV $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Label done; - __ bsfl(Rdst, $src$$Register); - __ jccb(Assembler::notZero, done); - __ movl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "TZCNT $dst, $src.lo\t# count trailing zeros (long) \n\t" - "JNC done\n\t" - "TZCNT $dst, $src.hi\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label done; - __ tzcntl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{ - predicate(!UseCountTrailingZerosInstruction); - match(Set dst (CountTrailingZerosL src)); - effect(TEMP dst, KILL cr); - - format %{ "BSF $dst, $src.lo\t# count trailing zeros (long)\n\t" - "JNZ done\n\t" - "BSF $dst, $src.hi\n\t" - "JNZ msw_not_zero\n\t" - "MOV $dst, 32\n" - "msw_not_zero:\n\t" - "ADD $dst, 32\n" - "done:" %} - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - Label msw_not_zero; - Label done; - __ bsfl(Rdst, Rsrc); - __ jccb(Assembler::notZero, done); - __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc)); - __ jccb(Assembler::notZero, msw_not_zero); - __ movl(Rdst, BitsPerInt); - __ bind(msw_not_zero); - __ addl(Rdst, BitsPerInt); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - - -//---------- Population Count Instructions ------------------------------------- - -instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountI src)); - effect(KILL cr); - - format %{ "POPCNT $dst, $src" %} - ins_encode %{ - __ popcntl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountI (LoadI mem))); - effect(KILL cr); - - format %{ "POPCNT $dst, $mem" %} - ins_encode %{ - __ popcntl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg); -%} - -// Note: Long.bitCount(long) returns an int. -instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountL src)); - effect(KILL cr, TEMP tmp, TEMP dst); - - format %{ "POPCNT $dst, $src.lo\n\t" - "POPCNT $tmp, $src.hi\n\t" - "ADD $dst, $tmp" %} - ins_encode %{ - __ popcntl($dst$$Register, $src$$Register); - __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); - __ addl($dst$$Register, $tmp$$Register); - %} - ins_pipe(ialu_reg); -%} - -// Note: Long.bitCount(long) returns an int. -instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{ - predicate(UsePopCountInstruction); - match(Set dst (PopCountL (LoadL mem))); - effect(KILL cr, TEMP tmp, TEMP dst); - - format %{ "POPCNT $dst, $mem\n\t" - "POPCNT $tmp, $mem+4\n\t" - "ADD $dst, $tmp" %} - ins_encode %{ - //__ popcntl($dst$$Register, $mem$$Address$$first); - //__ popcntl($tmp$$Register, $mem$$Address$$second); - __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none)); - __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none)); - __ addl($dst$$Register, $tmp$$Register); - %} - ins_pipe(ialu_reg); -%} - - -//----------Load/Store/Move Instructions--------------------------------------- -//----------Load Instructions-------------------------------------------------- -// Load Byte (8bit signed) -instruct loadB(xRegI dst, memory mem) %{ - match(Set dst (LoadB mem)); - - ins_cost(125); - format %{ "MOVSX8 $dst,$mem\t# byte" %} - - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Byte (8bit signed) into Long Register -instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadB mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,7" %} - - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended. - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8bit UNsigned) -instruct loadUB(xRegI dst, memory mem) %{ - match(Set dst (LoadUB mem)); - - ins_cost(125); - format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %} - - ins_encode %{ - __ movzbl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8 bit UNsigned) into Long Register -instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadUB mem))); - effect(KILL cr); - - ins_cost(250); - format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register -instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUB mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,right_n_bits($mask, 8)" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant & right_n_bits(8)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16bit signed) -instruct loadS(rRegI dst, memory mem) %{ - match(Set dst (LoadS mem)); - - ins_cost(125); - format %{ "MOVSX $dst,$mem\t# short" %} - - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16 bit signed) to Byte (8 bit signed) -instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# short -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Short (16bit signed) into Long Register -instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadS mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOVSX $dst.lo,$mem\t# short -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,15" %} - - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended. - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16bit unsigned) -instruct loadUS(rRegI dst, memory mem) %{ - match(Set dst (LoadUS mem)); - - ins_cost(125); - format %{ "MOVZX $dst,$mem\t# ushort/char -> int" %} - - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed) -instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# ushort -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) into Long Register -instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadUS mem))); - effect(KILL cr); - - ins_cost(250); - format %{ "MOVZX $dst.lo,$mem\t# ushort/char -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register -instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register -instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadUS mem) mask))); - effect(KILL cr); - - format %{ "MOVZX $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,right_n_bits($mask, 16)" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzwl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant & right_n_bits(16)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer -instruct loadI(rRegI dst, memory mem) %{ - match(Set dst (LoadI mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem\t# int" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Byte (8 bit signed) -instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# int -> byte" %} - ins_encode %{ - __ movsbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) -instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{ - match(Set dst (AndI (LoadI mem) mask)); - - ins_cost(125); - format %{ "MOVZX $dst, $mem\t# int -> ubyte" %} - ins_encode %{ - __ movzbl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Short (16 bit signed) -instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{ - match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); - - ins_cost(125); - format %{ "MOVSX $dst, $mem\t# int -> short" %} - ins_encode %{ - __ movswl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) -instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{ - match(Set dst (AndI (LoadI mem) mask)); - - ins_cost(125); - format %{ "MOVZX $dst, $mem\t# int -> ushort/char" %} - ins_encode %{ - __ movzwl($dst$$Register, $mem$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer into Long Register -instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{ - match(Set dst (ConvI2L (LoadI mem))); - effect(KILL cr); - - ins_cost(375); - format %{ "MOV $dst.lo,$mem\t# int -> long\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "SAR $dst.hi,31" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register. - __ sarl(HIGH_FROM_LOW($dst$$Register), 31); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with mask 0xFF into Long Register -instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzbl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with mask 0xFFFF into Long Register -instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOVZX $dst.lo,$mem\t# int & 0xFFFF -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movzwl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Integer with 31-bit mask into Long Register -instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{ - match(Set dst (ConvI2L (AndI (LoadI mem) mask))); - effect(KILL cr); - - format %{ "MOV $dst.lo,$mem\t# int & 31-bit mask -> long\n\t" - "XOR $dst.hi,$dst.hi\n\t" - "AND $dst.lo,$mask" %} - ins_encode %{ - Register Rdst = $dst$$Register; - __ movl(Rdst, $mem$$Address); - __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst)); - __ andl(Rdst, $mask$$constant); - %} - ins_pipe(ialu_reg_mem); -%} - -// Load Unsigned Integer into Long Register -instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{ - match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); - effect(KILL cr); - - ins_cost(250); - format %{ "MOV $dst.lo,$mem\t# uint -> long\n\t" - "XOR $dst.hi,$dst.hi" %} - - ins_encode %{ - __ movl($dst$$Register, $mem$$Address); - __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register)); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Load Long. Cannot clobber address while loading, so restrict address -// register to ESI -instruct loadL(eRegL dst, load_long_memory mem) %{ - predicate(!((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - - ins_cost(250); - format %{ "MOV $dst.lo,$mem\t# long\n\t" - "MOV $dst.hi,$mem+4" %} - - ins_encode %{ - Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none); - Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none); - __ movl($dst$$Register, Amemlo); - __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi); - %} - - ins_pipe(ialu_reg_long_mem); -%} - -// Volatile Load Long. Must be atomic, so do 64-bit FILD -// then store it down to the stack and reload on the int -// side. -instruct loadL_volatile(stackSlotL dst, memory mem) %{ - predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - - ins_cost(200); - format %{ "FILD $mem\t# Atomic volatile long load\n\t" - "FISTp $dst" %} - ins_encode(enc_loadL_volatile(mem,dst)); - ins_pipe( fpu_reg_mem ); -%} - -instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{ - predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - effect(TEMP tmp); - ins_cost(180); - format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" - "MOVSD $dst,$tmp" %} - ins_encode %{ - __ movdbl($tmp$$XMMRegister, $mem$$Address); - __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{ - predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access()); - match(Set dst (LoadL mem)); - effect(TEMP tmp); - ins_cost(160); - format %{ "MOVSD $tmp,$mem\t# Atomic volatile long load\n\t" - "MOVD $dst.lo,$tmp\n\t" - "PSRLQ $tmp,32\n\t" - "MOVD $dst.hi,$tmp" %} - ins_encode %{ - __ movdbl($tmp$$XMMRegister, $mem$$Address); - __ movdl($dst$$Register, $tmp$$XMMRegister); - __ psrlq($tmp$$XMMRegister, 32); - __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Load Range -instruct loadRange(rRegI dst, memory mem) %{ - match(Set dst (LoadRange mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - - -// Load Pointer -instruct loadP(eRegP dst, memory mem) %{ - match(Set dst (LoadP mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Klass Pointer -instruct loadKlass(eRegP dst, memory mem) %{ - match(Set dst (LoadKlass mem)); - - ins_cost(125); - format %{ "MOV $dst,$mem" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Double -instruct loadDPR(regDPR dst, memory mem) %{ - predicate(UseSSE<=1); - match(Set dst (LoadD mem)); - - ins_cost(150); - format %{ "FLD_D ST,$mem\n\t" - "FSTP $dst" %} - opcode(0xDD); /* DD /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Double to XMM -instruct loadD(regD dst, memory mem) %{ - predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); - match(Set dst (LoadD mem)); - ins_cost(145); - format %{ "MOVSD $dst,$mem" %} - ins_encode %{ - __ movdbl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -instruct loadD_partial(regD dst, memory mem) %{ - predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); - match(Set dst (LoadD mem)); - ins_cost(145); - format %{ "MOVLPD $dst,$mem" %} - ins_encode %{ - __ movdbl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load to XMM register (single-precision floating point) -// MOVSS instruction -instruct loadF(regF dst, memory mem) %{ - predicate(UseSSE>=1); - match(Set dst (LoadF mem)); - ins_cost(145); - format %{ "MOVSS $dst,$mem" %} - ins_encode %{ - __ movflt ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Load Float -instruct loadFPR(regFPR dst, memory mem) %{ - predicate(UseSSE==0); - match(Set dst (LoadF mem)); - - ins_cost(150); - format %{ "FLD_S ST,$mem\n\t" - "FSTP $dst" %} - opcode(0xD9); /* D9 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Effective Address -instruct leaP8(eRegP dst, indOffset8 mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaP32(eRegP dst, indOffset32 mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{ - match(Set dst mem); - - ins_cost(110); - format %{ "LEA $dst,$mem" %} - opcode(0x8D); - ins_encode( SetInstMark, OpcP, RegMem(dst,mem), ClearInstMark); - ins_pipe( ialu_reg_reg_fat ); -%} - -// Load Constant -instruct loadConI(rRegI dst, immI src) %{ - match(Set dst src); - - format %{ "MOV $dst,$src" %} - ins_encode( SetInstMark, LdImmI(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_fat ); -%} - -// Load Constant zero -instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - - ins_cost(50); - format %{ "XOR $dst,$dst" %} - opcode(0x33); /* + rd */ - ins_encode( OpcP, RegReg( dst, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct loadConP(eRegP dst, immP src) %{ - match(Set dst src); - - format %{ "MOV $dst,$src" %} - opcode(0xB8); /* + rd */ - ins_encode( SetInstMark, LdImmP(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_fat ); -%} - -instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - ins_cost(200); - format %{ "MOV $dst.lo,$src.lo\n\t" - "MOV $dst.hi,$src.hi" %} - opcode(0xB8); - ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) ); - ins_pipe( ialu_reg_long_fat ); -%} - -instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - ins_cost(150); - format %{ "XOR $dst.lo,$dst.lo\n\t" - "XOR $dst.hi,$dst.hi" %} - opcode(0x33,0x33); - ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) ); - ins_pipe( ialu_reg_long ); -%} - -// The instruction usage is guarded by predicate in operand immFPR(). -instruct loadConFPR(regFPR dst, immFPR con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLD_S ST,[$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immFPR0(). -instruct loadConFPR0(regFPR dst, immFPR0 con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLDZ ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fldz(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immFPR1(). -instruct loadConFPR1(regFPR dst, immFPR1 con) %{ - match(Set dst con); - ins_cost(125); - format %{ "FLD1 ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld1(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immF(). -instruct loadConF(regF dst, immF con) %{ - match(Set dst con); - ins_cost(125); - format %{ "MOVSS $dst,[$constantaddress]\t# load from constant table: float=$con" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immF0(). -instruct loadConF0(regF dst, immF0 src) %{ - match(Set dst src); - ins_cost(100); - format %{ "XORPS $dst,$dst\t# float 0.0" %} - ins_encode %{ - __ xorps($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immDPR(). -instruct loadConDPR(regDPR dst, immDPR con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLD_D ST,[$constantaddress]\t# load from constant table: double=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immDPR0(). -instruct loadConDPR0(regDPR dst, immDPR0 con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLDZ ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fldz(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immDPR1(). -instruct loadConDPR1(regDPR dst, immDPR1 con) %{ - match(Set dst con); - ins_cost(125); - - format %{ "FLD1 ST\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld1(); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_con); -%} - -// The instruction usage is guarded by predicate in operand immD(). -instruct loadConD(regD dst, immD con) %{ - match(Set dst con); - ins_cost(125); - format %{ "MOVSD $dst,[$constantaddress]\t# load from constant table: double=$con" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, $constantaddress($con)); - %} - ins_pipe(pipe_slow); -%} - -// The instruction usage is guarded by predicate in operand immD0(). -instruct loadConD0(regD dst, immD0 src) %{ - match(Set dst src); - ins_cost(100); - format %{ "XORPD $dst,$dst\t# double 0.0" %} - ins_encode %{ - __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Load Stack Slot -instruct loadSSI(rRegI dst, stackSlotI src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "MOV $dst,$src" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -instruct loadSSL(eRegL dst, stackSlotL src) %{ - match(Set dst src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi" %} - opcode(0x8B, 0x8B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -// Load Stack Slot -instruct loadSSP(eRegP dst, stackSlotP src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "MOV $dst,$src" %} - opcode(0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), ClearInstMark); - ins_pipe( ialu_reg_mem ); -%} - -// Load Stack Slot -instruct loadSSF(regFPR dst, stackSlotF src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "FLD_S $src\n\t" - "FSTP $dst" %} - opcode(0xD9); /* D9 /0, FLD m32real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Load Stack Slot -instruct loadSSD(regDPR dst, stackSlotD src) %{ - match(Set dst src); - ins_cost(125); - - format %{ "FLD_D $src\n\t" - "FSTP $dst" %} - opcode(0xDD); /* DD /0, FLD m64real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// Prefetch instructions for allocation. -// Must be safe to execute with invalid address (cannot fault). - -instruct prefetchAlloc0( memory mem ) %{ - predicate(UseSSE==0 && AllocatePrefetchInstr!=3); - match(PrefetchAllocation mem); - ins_cost(0); - size(0); - format %{ "Prefetch allocation (non-SSE is empty encoding)" %} - ins_encode(); - ins_pipe(empty); -%} - -instruct prefetchAlloc( memory mem ) %{ - predicate(AllocatePrefetchInstr==3); - match( PrefetchAllocation mem ); - ins_cost(100); - - format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %} - ins_encode %{ - __ prefetchw($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocNTA( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==0); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %} - ins_encode %{ - __ prefetchnta($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocT0( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==1); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %} - ins_encode %{ - __ prefetcht0($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -instruct prefetchAllocT2( memory mem ) %{ - predicate(UseSSE>=1 && AllocatePrefetchInstr==2); - match(PrefetchAllocation mem); - ins_cost(100); - - format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %} - ins_encode %{ - __ prefetcht2($mem$$Address); - %} - ins_pipe(ialu_mem); -%} - -//----------Store Instructions------------------------------------------------- - -// Store Byte -instruct storeB(memory mem, xRegI src) %{ - match(Set mem (StoreB mem src)); - - ins_cost(125); - format %{ "MOV8 $mem,$src" %} - opcode(0x88); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Char/Short -instruct storeC(memory mem, rRegI src) %{ - match(Set mem (StoreC mem src)); - - ins_cost(125); - format %{ "MOV16 $mem,$src" %} - opcode(0x89, 0x66); - ins_encode( SetInstMark, OpcS, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer -instruct storeI(memory mem, rRegI src) %{ - match(Set mem (StoreI mem src)); - - ins_cost(125); - format %{ "MOV $mem,$src" %} - opcode(0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Long -instruct storeL(long_memory mem, eRegL src) %{ - predicate(!((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - - ins_cost(200); - format %{ "MOV $mem,$src.lo\n\t" - "MOV $mem+4,$src.hi" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -// Store Long to Integer -instruct storeL2I(memory mem, eRegL src) %{ - match(Set mem (StoreI mem (ConvL2I src))); - - format %{ "MOV $mem,$src.lo\t# long -> int" %} - ins_encode %{ - __ movl($mem$$Address, $src$$Register); - %} - ins_pipe(ialu_mem_reg); -%} - -// Volatile Store Long. Must be atomic, so move it into -// the FP TOS and then do a 64-bit FIST. Has to probe the -// target address before the store (for null-ptr checks) -// so the memory operand is used twice in the encoding. -instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{ - predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( KILL cr ); - ins_cost(400); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "FILD $src\n\t" - "FISTp $mem\t # 64-bit atomic volatile long store" %} - opcode(0x3B); - ins_encode( SetInstMark, OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{ - predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( TEMP tmp, KILL cr ); - ins_cost(380); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "MOVSD $tmp,$src\n\t" - "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - ins_encode %{ - __ cmpl(rax, $mem$$Address); - __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp)); - __ movdbl($mem$$Address, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{ - predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access()); - match(Set mem (StoreL mem src)); - effect( TEMP tmp2 , TEMP tmp, KILL cr ); - ins_cost(360); - format %{ "CMP $mem,EAX\t# Probe address for implicit null check\n\t" - "MOVD $tmp,$src.lo\n\t" - "MOVD $tmp2,$src.hi\n\t" - "PUNPCKLDQ $tmp,$tmp2\n\t" - "MOVSD $mem,$tmp\t # 64-bit atomic volatile long store" %} - ins_encode %{ - __ cmpl(rax, $mem$$Address); - __ movdl($tmp$$XMMRegister, $src$$Register); - __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister); - __ movdbl($mem$$Address, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store Pointer; for storing unknown oops and raw pointers -instruct storeP(memory mem, anyRegP src) %{ - match(Set mem (StoreP mem src)); - - ins_cost(125); - format %{ "MOV $mem,$src" %} - opcode(0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer Immediate -instruct storeImmI(memory mem, immI src) %{ - match(Set mem (StoreI mem src)); - - ins_cost(150); - format %{ "MOV $mem,$src" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Short/Char Immediate -instruct storeImmI16(memory mem, immI16 src) %{ - predicate(UseStoreImmI16); - match(Set mem (StoreC mem src)); - - ins_cost(150); - format %{ "MOV16 $mem,$src" %} - opcode(0xC7); /* C7 /0 Same as 32 store immediate with prefix */ - ins_encode( SetInstMark, SizePrefix, OpcP, RMopc_Mem(0x00,mem), Con16(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Pointer Immediate; null pointers or constant oops that do not -// need card-mark barriers. -instruct storeImmP(memory mem, immP src) %{ - match(Set mem (StoreP mem src)); - - ins_cost(150); - format %{ "MOV $mem,$src" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32( src ), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Byte Immediate -instruct storeImmB(memory mem, immI8 src) %{ - match(Set mem (StoreB mem src)); - - ins_cost(150); - format %{ "MOV8 $mem,$src" %} - opcode(0xC6); /* C6 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con8or32(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Double -instruct storeDPR( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreD mem src)); - - ins_cost(100); - format %{ "FST_D $mem,$src" %} - opcode(0xDD); /* DD /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store double does rounding on x86 -instruct storeDPR_rounded( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreD mem (RoundDouble src))); - - ins_cost(100); - format %{ "FST_D $mem,$src\t# round" %} - opcode(0xDD); /* DD /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store XMM register to memory (double-precision floating points) -// MOVSD instruction -instruct storeD(memory mem, regD src) %{ - predicate(UseSSE>=2); - match(Set mem (StoreD mem src)); - ins_cost(95); - format %{ "MOVSD $mem,$src" %} - ins_encode %{ - __ movdbl($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Store XMM register to memory (single-precision floating point) -// MOVSS instruction -instruct storeF(memory mem, regF src) %{ - predicate(UseSSE>=1); - match(Set mem (StoreF mem src)); - ins_cost(95); - format %{ "MOVSS $mem,$src" %} - ins_encode %{ - __ movflt($mem$$Address, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - - -// Store Float -instruct storeFPR( memory mem, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set mem (StoreF mem src)); - - ins_cost(100); - format %{ "FST_S $mem,$src" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store Float does rounding on x86 -instruct storeFPR_rounded( memory mem, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set mem (StoreF mem (RoundFloat src))); - - ins_cost(100); - format %{ "FST_S $mem,$src\t# round" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store Float does rounding on x86 -instruct storeFPR_Drounded( memory mem, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set mem (StoreF mem (ConvD2F src))); - - ins_cost(100); - format %{ "FST_S $mem,$src\t# D-round" %} - opcode(0xD9); /* D9 /2 */ - ins_encode( enc_FPR_store(mem,src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Store immediate Float value (it is faster than store from FPU register) -// The instruction usage is guarded by predicate in operand immFPR(). -instruct storeFPR_imm( memory mem, immFPR src) %{ - match(Set mem (StoreF mem src)); - - ins_cost(50); - format %{ "MOV $mem,$src\t# store float" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32FPR_as_bits(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store immediate Float value (it is faster than store from XMM register) -// The instruction usage is guarded by predicate in operand immF(). -instruct storeF_imm( memory mem, immF src) %{ - match(Set mem (StoreF mem src)); - - ins_cost(50); - format %{ "MOV $mem,$src\t# store float" %} - opcode(0xC7); /* C7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), Con32F_as_bits(src), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -// Store Integer to stack slot -instruct storeSSI(stackSlotI dst, rRegI src) %{ - match(Set dst src); - - ins_cost(100); - format %{ "MOV $dst,$src" %} - opcode(0x89); - ins_encode( OpcPRegSS( dst, src ) ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Integer to stack slot -instruct storeSSP(stackSlotP dst, eRegP src) %{ - match(Set dst src); - - ins_cost(100); - format %{ "MOV $dst,$src" %} - opcode(0x89); - ins_encode( OpcPRegSS( dst, src ) ); - ins_pipe( ialu_mem_reg ); -%} - -// Store Long to stack slot -instruct storeSSL(stackSlotL dst, eRegL src) %{ - match(Set dst src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - -//----------MemBar Instructions----------------------------------------------- -// Memory barrier flavors - -instruct membar_acquire() %{ - match(MemBarAcquire); - match(LoadFence); - ins_cost(400); - - size(0); - format %{ "MEMBAR-acquire ! (empty encoding)" %} - ins_encode(); - ins_pipe(empty); -%} - -instruct membar_acquire_lock() %{ - match(MemBarAcquireLock); - ins_cost(0); - - size(0); - format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_release() %{ - match(MemBarRelease); - match(StoreFence); - ins_cost(400); - - size(0); - format %{ "MEMBAR-release ! (empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_release_lock() %{ - match(MemBarReleaseLock); - ins_cost(0); - - size(0); - format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_volatile(eFlagsReg cr) %{ - match(MemBarVolatile); - effect(KILL cr); - ins_cost(400); - - format %{ - $$template - $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" - %} - ins_encode %{ - __ membar(Assembler::StoreLoad); - %} - ins_pipe(pipe_slow); -%} - -instruct unnecessary_membar_volatile() %{ - match(MemBarVolatile); - predicate(Matcher::post_store_load_barrier(n)); - ins_cost(0); - - size(0); - format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -instruct membar_storestore() %{ - match(MemBarStoreStore); - match(StoreStoreFence); - ins_cost(0); - - size(0); - format %{ "MEMBAR-storestore (empty encoding)" %} - ins_encode( ); - ins_pipe(empty); -%} - -//----------Move Instructions-------------------------------------------------- -instruct castX2P(eAXRegP dst, eAXRegI src) %{ - match(Set dst (CastX2P src)); - format %{ "# X2P $dst, $src" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe(empty); -%} - -instruct castP2X(rRegI dst, eRegP src ) %{ - match(Set dst (CastP2X src)); - ins_cost(50); - format %{ "MOV $dst, $src\t# CastP2X" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -//----------Conditional Move--------------------------------------------------- -// Conditional move -instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{ - predicate(!VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "J$cop,us skip\t# signed cmove\n\t" - "MOV $dst,$src\n" - "skip:" %} - ins_encode %{ - Label Lskip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); - __ movl($dst$$Register, $src$$Register); - __ bind(Lskip); - %} - ins_pipe( pipe_cmov_reg ); -%} - -instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{ - predicate(!VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "J$cop,us skip\t# unsigned cmove\n\t" - "MOV $dst,$src\n" - "skip:" %} - ins_encode %{ - Label Lskip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip); - __ movl($dst$$Register, $src$$Register); - __ bind(Lskip); - %} - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovI_regU(cop, cr, dst, src); - %} -%} - -// Conditional move -instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -// Conditional move -instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cop $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cop), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovI_memU(cop, cr, dst, src); - %} -%} - -// Conditional move -instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src\t# ptr" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Conditional move (non-P6 version) -// Note: a CMoveP is generated for stubs and native wrappers -// regardless of whether we are on a P6, so we -// emulate a cmov here -instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{ - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(300); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# pointer\n" - "skip:" %} - opcode(0x8b); - ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src)); - ins_pipe( pipe_cmov_reg ); -%} - -// Conditional move -instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst,$src\t# ptr" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveP (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovP_regU(cop, cr, dst, src); - %} -%} - -// DISABLED: Requires the ADLC to emit a bottom_type call that -// correctly meets the two pointer arguments; one is an incoming -// register but the other is a memory operand. ALSO appears to -// be buggy with implicit null checks. -// -//// Conditional move -//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); -// ins_cost(250); -// format %{ "CMOV$cop $dst,$src\t# ptr" %} -// opcode(0x0F,0x40); -// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); -// ins_pipe( pipe_cmov_mem ); -//%} -// -//// Conditional move -//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src)))); -// ins_cost(250); -// format %{ "CMOV$cop $dst,$src\t# ptr" %} -// opcode(0x0F,0x40); -// ins_encode( enc_cmov(cop), RegMem( dst, src ) ); -// ins_pipe( pipe_cmov_mem ); -//%} - -// Conditional move -instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "FCMOV$cop $dst,$src\t# double" %} - opcode(0xDA); - ins_encode( enc_cmov_dpr(cop,src) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Conditional move -instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "FCMOV$cop $dst,$src\t# float" %} - opcode(0xDA); - ins_encode( enc_cmov_dpr(cop,src) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# double\n" - "skip:" %} - opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// Float CMOV on Intel doesn't handle *signed* compares, only unsigned. -instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOV $dst,$src\t# float\n" - "skip:" %} - opcode (0xdd, 0x3); /* DD D8+i or DD /3 */ - ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) ); - ins_pipe( pipe_cmovDPR_reg ); -%} - -// No CMOVE with SSE/SSE2 -instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSS $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -// No CMOVE with SSE/SSE2 -instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSD $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -// unsigned version -instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSS $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movflt($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{ - predicate (UseSSE>=1); - match(Set dst (CMoveF (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regU(cop, cr, dst, src); - %} -%} - -// unsigned version -instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "Jn$cop skip\n\t" - "MOVSD $dst,$src\t# float\n" - "skip:" %} - ins_encode %{ - Label skip; - // Invert sense of branch from sense of CMOV - __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip); - __ movdbl($dst$$XMMRegister, $src$$XMMRegister); - __ bind(skip); - %} - ins_pipe( pipe_slow ); -%} - -instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{ - predicate (UseSSE>=2); - match(Set dst (CMoveD (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regU(cop, cr, dst, src); - %} -%} - -instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst.lo,$src.lo\n\t" - "CMOV$cop $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cop $dst.lo,$src.lo\n\t" - "CMOV$cop $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{ - predicate(VM_Version::supports_cmov() ); - match(Set dst (CMoveL (Binary cop cr) (Binary dst src))); - ins_cost(200); - expand %{ - cmovL_regU(cop, cr, dst, src); - %} -%} - -//----------Arithmetic Instructions-------------------------------------------- -//----------Addition Instructions---------------------------------------------- - -// Integer Addition Instructions -instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(2); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (AddI dst src)); - effect(KILL cr); - - format %{ "ADD $dst,$src" %} - opcode(0x81, 0x00); /* /0 id */ - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ - predicate(UseIncDec); - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(1); - format %{ "INC $dst" %} - opcode(0x40); /* */ - ins_encode( Opc_plus( primary, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{ - match(Set dst (AddI src0 src1)); - ins_cost(110); - - format %{ "LEA $dst,[$src0 + $src1]" %} - opcode(0x8D); /* 0x8D /r */ - ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); - ins_pipe( ialu_reg_reg ); -%} - -instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{ - match(Set dst (AddP src0 src1)); - ins_cost(110); - - format %{ "LEA $dst,[$src0 + $src1]\t# ptr" %} - opcode(0x8D); /* 0x8D /r */ - ins_encode( SetInstMark, OpcP, RegLea( dst, src0, src1 ), ClearInstMark ); - ins_pipe( ialu_reg_reg ); -%} - -instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{ - predicate(UseIncDec); - match(Set dst (AddI dst src)); - effect(KILL cr); - - size(1); - format %{ "DEC $dst" %} - opcode(0x48); /* */ - ins_encode( Opc_plus( primary, dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AddP dst src)); - effect(KILL cr); - - size(2); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{ - match(Set dst (AddP dst src)); - effect(KILL cr); - - format %{ "ADD $dst,$src" %} - opcode(0x81,0x00); /* Opcode 81 /0 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (AddI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "ADD $dst,$src" %} - opcode(0x03); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "ADD $dst,$src" %} - opcode(0x01); /* Opcode 01 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Add Memory with Immediate -instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "ADD $dst,$src" %} - opcode(0x81); /* Opcode 81 /0 id */ - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "INC $dst" %} - opcode(0xFF); /* Opcode FF /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,dst), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - -instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AddI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "DEC $dst" %} - opcode(0xFF); /* Opcode FF /1 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x01,dst), ClearInstMark); - ins_pipe( ialu_mem_imm ); -%} - - -instruct checkCastPP( eRegP dst ) %{ - match(Set dst (CheckCastPP dst)); - - size(0); - format %{ "#checkcastPP of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_pipe( empty ); -%} - -instruct castPP( eRegP dst ) %{ - match(Set dst (CastPP dst)); - format %{ "#castPP of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_pipe( empty ); -%} - -instruct castII( rRegI dst ) %{ - match(Set dst (CastII dst)); - format %{ "#castII of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castLL( eRegL dst ) %{ - match(Set dst (CastLL dst)); - format %{ "#castLL of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castFF( regF dst ) %{ - predicate(UseSSE >= 1); - match(Set dst (CastFF dst)); - format %{ "#castFF of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castDD( regD dst ) %{ - predicate(UseSSE >= 2); - match(Set dst (CastDD dst)); - format %{ "#castDD of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castFF_PR( regFPR dst ) %{ - predicate(UseSSE < 1); - match(Set dst (CastFF dst)); - format %{ "#castFF of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -instruct castDD_PR( regDPR dst ) %{ - predicate(UseSSE < 2); - match(Set dst (CastDD dst)); - format %{ "#castDD of $dst" %} - ins_encode( /*empty encoding*/ ); - ins_cost(0); - ins_pipe( empty ); -%} - -// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them - -instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg8(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapP( rRegI res, pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ - match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchgb(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{ - match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchgw(mem_ptr), - enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); - match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval))); - effect(KILL cr, KILL oldval); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" - "MOV $res,0\n\t" - "JNE,s fail\n\t" - "MOV $res,1\n" - "fail:" %} - ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{ - match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg8(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchgb(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchgw(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{ - match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval))); - effect(KILL cr); - format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %} - ins_encode( enc_cmpxchg(mem_ptr) ); - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddB mem add)); - effect(KILL cr); - format %{ "ADDB [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addb($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -// Important to match to xRegI: only 8-bit regs. -instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddB mem newval)); - effect(KILL cr); - format %{ "XADDB [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddb($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddS mem add)); - effect(KILL cr); - format %{ "ADDS [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addw($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddS mem newval)); - effect(KILL cr); - format %{ "XADDS [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddw($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{ - predicate(n->as_LoadStore()->result_not_used()); - match(Set dummy (GetAndAddI mem add)); - effect(KILL cr); - format %{ "ADDL [$mem],$add" %} - ins_encode %{ - __ lock(); - __ addl($mem$$Address, $add$$constant); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{ - match(Set newval (GetAndAddI mem newval)); - effect(KILL cr); - format %{ "XADDL [$mem],$newval" %} - ins_encode %{ - __ lock(); - __ xaddl($mem$$Address, $newval$$Register); - %} - ins_pipe( pipe_cmpxchg ); -%} - -// Important to match to xRegI: only 8-bit regs. -instruct xchgB( memory mem, xRegI newval) %{ - match(Set newval (GetAndSetB mem newval)); - format %{ "XCHGB $newval,[$mem]" %} - ins_encode %{ - __ xchgb($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgS( memory mem, rRegI newval) %{ - match(Set newval (GetAndSetS mem newval)); - format %{ "XCHGW $newval,[$mem]" %} - ins_encode %{ - __ xchgw($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgI( memory mem, rRegI newval) %{ - match(Set newval (GetAndSetI mem newval)); - format %{ "XCHGL $newval,[$mem]" %} - ins_encode %{ - __ xchgl($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -instruct xchgP( memory mem, pRegP newval) %{ - match(Set newval (GetAndSetP mem newval)); - format %{ "XCHGL $newval,[$mem]" %} - ins_encode %{ - __ xchgl($newval$$Register, $mem$$Address); - %} - ins_pipe( pipe_cmpxchg ); -%} - -//----------Subtraction Instructions------------------------------------------- - -// Integer Subtraction Instructions -instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (SubI dst src)); - effect(KILL cr); - - size(2); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (SubI dst src)); - effect(KILL cr); - - format %{ "SUB $dst,$src" %} - opcode(0x81,0x05); /* Opcode 81 /5 */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (SubI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (SubI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "SUB $dst,$src" %} - opcode(0x29); /* Opcode 29 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Subtract from a pointer -instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (AddP dst (SubI zero src))); - effect(KILL cr); - - size(2); - format %{ "SUB $dst,$src" %} - opcode(0x2B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (SubI zero dst)); - effect(KILL cr); - - size(2); - format %{ "NEG $dst" %} - opcode(0xF7,0x03); // Opcode F7 /3 - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -//----------Multiplication/Division Instructions------------------------------- -// Integer Multiplication Instructions -// Multiply Register -instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (MulI dst src)); - effect(KILL cr); - - size(3); - ins_cost(300); - format %{ "IMUL $dst,$src" %} - opcode(0xAF, 0x0F); - ins_encode( OpcS, OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Multiply 32-bit Immediate -instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{ - match(Set dst (MulI src imm)); - effect(KILL cr); - - ins_cost(300); - format %{ "IMUL $dst,$src,$imm" %} - opcode(0x69); /* 69 /r id */ - ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{ - match(Set dst src); - effect(KILL cr); - - // Note that this is artificially increased to make it more expensive than loadConL - ins_cost(250); - format %{ "MOV EAX,$src\t// low word only" %} - opcode(0xB8); - ins_encode( LdImmL_Lo(dst, src) ); - ins_pipe( ialu_reg_fat ); -%} - -// Multiply by 32-bit Immediate, taking the shifted high order results -// (special case for shift by 32) -instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{ - match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); - predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); - effect(USE src1, KILL cr); - - // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only - ins_cost(0*100 + 1*400 - 150); - format %{ "IMUL EDX:EAX,$src1" %} - ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply by 32-bit Immediate, taking the shifted high order results -instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt))); - predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint && - _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint ); - effect(USE src1, KILL cr); - - // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only - ins_cost(1*100 + 1*400 - 150); - format %{ "IMUL EDX:EAX,$src1\n\t" - "SAR EDX,$cnt-32" %} - ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply Memory 32-bit Immediate -instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{ - match(Set dst (MulI (LoadI src) imm)); - effect(KILL cr); - - ins_cost(300); - format %{ "IMUL $dst,$src,$imm" %} - opcode(0x69); /* 69 /r id */ - ins_encode( SetInstMark, OpcSE(imm), RegMem( dst, src ), Con8or32( imm ), ClearInstMark ); - ins_pipe( ialu_reg_mem_alu0 ); -%} - -// Multiply Memory -instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (MulI dst (LoadI src))); - effect(KILL cr); - - ins_cost(350); - format %{ "IMUL $dst,$src" %} - opcode(0xAF, 0x0F); - ins_encode( SetInstMark, OpcS, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem_alu0 ); -%} - -instruct mulAddS2I_rReg(rRegI dst, rRegI src1, rRegI src2, rRegI src3, eFlagsReg cr) -%{ - match(Set dst (MulAddS2I (Binary dst src1) (Binary src2 src3))); - effect(KILL cr, KILL src2); - - expand %{ mulI_eReg(dst, src1, cr); - mulI_eReg(src2, src3, cr); - addI_eReg(dst, src2, cr); %} -%} - -// Multiply Register Int to Long -instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{ - // Basic Idea: long = (long)int * (long)int - match(Set dst (MulL (ConvI2L src) (ConvI2L src1))); - effect(DEF dst, USE src, USE src1, KILL flags); - - ins_cost(300); - format %{ "IMUL $dst,$src1" %} - - ins_encode( long_int_multiply( dst, src1 ) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{ - // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL) - match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask))); - effect(KILL flags); - - ins_cost(300); - format %{ "MUL $dst,$src1" %} - - ins_encode( long_uint_multiply(dst, src1) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Multiply Register Long -instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(4*100+3*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi) - format %{ "MOV $tmp,$src.lo\n\t" - "IMUL $tmp,EDX\n\t" - "MOV EDX,$src.hi\n\t" - "IMUL EDX,EAX\n\t" - "ADD $tmp,EDX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode( long_multiply( dst, src, tmp ) ); - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the left operand's high 32 bits are zero -instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(1))); - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0 - format %{ "MOV $tmp,$src.hi\n\t" - "IMUL $tmp,EAX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode %{ - __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register)); - __ imull($tmp$$Register, rax); - __ mull($src$$Register); - __ addl(rdx, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the right operand's high 32 bits are zero -instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(2))); - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0 - format %{ "MOV $tmp,$src.lo\n\t" - "IMUL $tmp,EDX\n\t" - "MUL EDX:EAX,$src.lo\n\t" - "ADD EDX,$tmp" %} - ins_encode %{ - __ movl($tmp$$Register, $src$$Register); - __ imull($tmp$$Register, rdx); - __ mull($src$$Register); - __ addl(rdx, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long where the left and the right operands' high 32 bits are zero -instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{ - predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2))); - match(Set dst (MulL dst src)); - effect(KILL cr); - ins_cost(1*400); -// Basic idea: lo(result) = lo(x_lo * y_lo) -// hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0 - format %{ "MUL EDX:EAX,$src.lo\n\t" %} - ins_encode %{ - __ mull($src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Multiply Register Long by small constant -instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{ - match(Set dst (MulL dst src)); - effect(KILL cr, TEMP tmp); - ins_cost(2*100+2*400); - size(12); -// Basic idea: lo(result) = lo(src * EAX) -// hi(result) = hi(src * EAX) + lo(src * EDX) - format %{ "IMUL $tmp,EDX,$src\n\t" - "MOV EDX,$src\n\t" - "MUL EDX\t# EDX*EAX -> EDX:EAX\n\t" - "ADD EDX,$tmp" %} - ins_encode( long_multiply_con( dst, src, tmp ) ); - ins_pipe( pipe_slow ); -%} - -// Integer DIV with Register -instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ - match(Set rax (DivI rax div)); - effect(KILL rdx, KILL cr); - size(26); - ins_cost(30*100+10*100); - format %{ "CMP EAX,0x80000000\n\t" - "JNE,s normal\n\t" - "XOR EDX,EDX\n\t" - "CMP ECX,-1\n\t" - "JE,s done\n" - "normal: CDQ\n\t" - "IDIV $div\n\t" - "done:" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Divide Register Long -instruct divL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ - match(Set dst (DivL src1 src2)); - effect(CALL); - ins_cost(10000); - format %{ "PUSH $src1.hi\n\t" - "PUSH $src1.lo\n\t" - "PUSH $src2.hi\n\t" - "PUSH $src2.lo\n\t" - "CALL SharedRuntime::ldiv\n\t" - "ADD ESP,16" %} - ins_encode( long_div(src1,src2) ); - ins_pipe( pipe_slow ); -%} - -// Integer DIVMOD with Register, both quotient and mod results -instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{ - match(DivModI rax div); - effect(KILL cr); - size(26); - ins_cost(30*100+10*100); - format %{ "CMP EAX,0x80000000\n\t" - "JNE,s normal\n\t" - "XOR EDX,EDX\n\t" - "CMP ECX,-1\n\t" - "JE,s done\n" - "normal: CDQ\n\t" - "IDIV $div\n\t" - "done:" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( pipe_slow ); -%} - -// Integer MOD with Register -instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{ - match(Set rdx (ModI rax div)); - effect(KILL rax, KILL cr); - - size(26); - ins_cost(300); - format %{ "CDQ\n\t" - "IDIV $div" %} - opcode(0xF7, 0x7); /* Opcode F7 /7 */ - ins_encode( cdq_enc, OpcP, RegOpc(div) ); - ins_pipe( ialu_reg_reg_alu0 ); -%} - -// Remainder Register Long -instruct modL_eReg(eADXRegL dst, eRegL src1, eRegL src2) %{ - match(Set dst (ModL src1 src2)); - effect(CALL); - ins_cost(10000); - format %{ "PUSH $src1.hi\n\t" - "PUSH $src1.lo\n\t" - "PUSH $src2.hi\n\t" - "PUSH $src2.lo\n\t" - "CALL SharedRuntime::lrem\n\t" - "ADD ESP,16" %} - ins_encode( long_mod(src1,src2) ); - ins_pipe( pipe_slow ); -%} - -// Divide Register Long (no special case since divisor != -1) -instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ - match(Set dst (DivL dst imm)); - effect( TEMP tmp, TEMP tmp2, KILL cr ); - ins_cost(1000); - format %{ "MOV $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t" - "XOR $tmp2,$tmp2\n\t" - "CMP $tmp,EDX\n\t" - "JA,s fast\n\t" - "MOV $tmp2,EAX\n\t" - "MOV EAX,EDX\n\t" - "MOV EDX,0\n\t" - "JLE,s pos\n\t" - "LNEG EAX : $tmp2\n\t" - "DIV $tmp # unsigned division\n\t" - "XCHG EAX,$tmp2\n\t" - "DIV $tmp\n\t" - "LNEG $tmp2 : EAX\n\t" - "JMP,s done\n" - "pos:\n\t" - "DIV $tmp\n\t" - "XCHG EAX,$tmp2\n" - "fast:\n\t" - "DIV $tmp\n" - "done:\n\t" - "MOV EDX,$tmp2\n\t" - "NEG EDX:EAX # if $imm < 0" %} - ins_encode %{ - int con = (int)$imm$$constant; - assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); - int pcon = (con > 0) ? con : -con; - Label Lfast, Lpos, Ldone; - - __ movl($tmp$$Register, pcon); - __ xorl($tmp2$$Register,$tmp2$$Register); - __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); - __ jccb(Assembler::above, Lfast); // result fits into 32 bit - - __ movl($tmp2$$Register, $dst$$Register); // save - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags - __ jccb(Assembler::lessEqual, Lpos); // result is positive - - // Negative dividend. - // convert value to positive to use unsigned division - __ lneg($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - __ xchgl($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - // revert result back to negative - __ lneg($tmp2$$Register, $dst$$Register); - __ jmpb(Ldone); - - __ bind(Lpos); - __ divl($tmp$$Register); // Use unsigned division - __ xchgl($dst$$Register, $tmp2$$Register); - // Fallthrow for final divide, tmp2 has 32 bit hi result - - __ bind(Lfast); - // fast path: src is positive - __ divl($tmp$$Register); // Use unsigned division - - __ bind(Ldone); - __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register); - if (con < 0) { - __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register); - } - %} - ins_pipe( pipe_slow ); -%} - -// Remainder Register Long (remainder fit into 32 bits) -instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{ - match(Set dst (ModL dst imm)); - effect( TEMP tmp, TEMP tmp2, KILL cr ); - ins_cost(1000); - format %{ "MOV $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t" - "CMP $tmp,EDX\n\t" - "JA,s fast\n\t" - "MOV $tmp2,EAX\n\t" - "MOV EAX,EDX\n\t" - "MOV EDX,0\n\t" - "JLE,s pos\n\t" - "LNEG EAX : $tmp2\n\t" - "DIV $tmp # unsigned division\n\t" - "MOV EAX,$tmp2\n\t" - "DIV $tmp\n\t" - "NEG EDX\n\t" - "JMP,s done\n" - "pos:\n\t" - "DIV $tmp\n\t" - "MOV EAX,$tmp2\n" - "fast:\n\t" - "DIV $tmp\n" - "done:\n\t" - "MOV EAX,EDX\n\t" - "SAR EDX,31\n\t" %} - ins_encode %{ - int con = (int)$imm$$constant; - assert(con != 0 && con != -1 && con != min_jint, "wrong divisor"); - int pcon = (con > 0) ? con : -con; - Label Lfast, Lpos, Ldone; - - __ movl($tmp$$Register, pcon); - __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register)); - __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit - - __ movl($tmp2$$Register, $dst$$Register); // save - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags - __ jccb(Assembler::lessEqual, Lpos); // result is positive - - // Negative dividend. - // convert value to positive to use unsigned division - __ lneg($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - __ movl($dst$$Register, $tmp2$$Register); - __ divl($tmp$$Register); - // revert remainder back to negative - __ negl(HIGH_FROM_LOW($dst$$Register)); - __ jmpb(Ldone); - - __ bind(Lpos); - __ divl($tmp$$Register); - __ movl($dst$$Register, $tmp2$$Register); - - __ bind(Lfast); - // fast path: src is positive - __ divl($tmp$$Register); - - __ bind(Ldone); - __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register)); - __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign - - %} - ins_pipe( pipe_slow ); -%} - -// Integer Shift Instructions -// Shift Left by one -instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHL $dst,$shift" %} - opcode(0xD1, 0x4); /* D1 /4 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Shift Left by 8-bit immediate -instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SHL $dst,$shift" %} - opcode(0xC1, 0x4); /* C1 /4 ib */ - ins_encode( RegOpcImm( dst, shift) ); - ins_pipe( ialu_reg ); -%} - -// Shift Left by variable -instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (LShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHL $dst,$shift" %} - opcode(0xD3, 0x4); /* D3 /4 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - -// Arithmetic shift right by one -instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SAR $dst,$shift" %} - opcode(0xD1, 0x7); /* D1 /7 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Arithmetic shift right by one -instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); - effect(KILL cr); - format %{ "SAR $dst,$shift" %} - opcode(0xD1, 0x7); /* D1 /7 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary,dst), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by 8-bit immediate -instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SAR $dst,$shift" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( RegOpcImm( dst, shift ) ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by 8-bit immediate -instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (StoreI dst (RShiftI (LoadI dst) shift))); - effect(KILL cr); - - format %{ "SAR $dst,$shift" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(secondary, dst ), Con8or32(shift), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// Arithmetic Shift Right by variable -instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (RShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SAR $dst,$shift" %} - opcode(0xD3, 0x7); /* D3 /7 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - -// Logical shift right by one -instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHR $dst,$shift" %} - opcode(0xD1, 0x5); /* D1 /5 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -// Logical Shift Right by 8-bit immediate -instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(3); - format %{ "SHR $dst,$shift" %} - opcode(0xC1, 0x5); /* C1 /5 ib */ - ins_encode( RegOpcImm( dst, shift) ); - ins_pipe( ialu_reg ); -%} - - -// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. -// This idiom is used by the compiler for the i2b bytecode. -instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{ - match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); - - size(3); - format %{ "MOVSX $dst,$src :8" %} - ins_encode %{ - __ movsbl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. -// This idiom is used by the compiler the i2s bytecode. -instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{ - match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); - - size(3); - format %{ "MOVSX $dst,$src :16" %} - ins_encode %{ - __ movswl($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - - -// Logical Shift Right by variable -instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (URShiftI dst shift)); - effect(KILL cr); - - size(2); - format %{ "SHR $dst,$shift" %} - opcode(0xD3, 0x5); /* D3 /5 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg_reg ); -%} - - -//----------Logical Instructions----------------------------------------------- -//----------Integer Logical Instructions--------------------------------------- -// And Instructions -// And Register with Register -instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (AndI dst src)); - effect(KILL cr); - - size(2); - format %{ "AND $dst,$src" %} - opcode(0x23); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -// And Register with Immediate -instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (AndI dst src)); - effect(KILL cr); - - format %{ "AND $dst,$src" %} - opcode(0x81,0x04); /* Opcode 81 /4 */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// And Register with Memory -instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (AndI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "AND $dst,$src" %} - opcode(0x23); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// And Memory with Register -instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AndI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "AND $dst,$src" %} - opcode(0x21); /* Opcode 21 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// And Memory with Immediate -instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (AndI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "AND $dst,$src" %} - opcode(0x81, 0x4); /* Opcode 81 /4 id */ - // ins_encode( MemImm( dst, src) ); - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// BMI1 instructions -instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndI (XorI src1 minus_1) src2)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "ANDNL $dst, $src1, $src2" %} - - ins_encode %{ - __ andnl($dst$$Register, $src1$$Register, $src2$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "ANDNL $dst, $src1, $src2" %} - - ins_encode %{ - __ andnl($dst$$Register, $src1$$Register, $src2$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndI (SubI imm_zero src) src)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSIL $dst, $src" %} - - ins_encode %{ - __ blsil($dst$$Register, $src$$Register); - %} - ins_pipe(ialu_reg); -%} - -instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSIL $dst, $src" %} - - ins_encode %{ - __ blsil($dst$$Register, $src$$Address); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorI (AddI src minus_1) src)); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSMSKL $dst, $src" %} - - ins_encode %{ - __ blsmskl($dst$$Register, $src$$Register); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSMSKL $dst, $src" %} - - ins_encode %{ - __ blsmskl($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndI (AddI src minus_1) src) ); - predicate(UseBMI1Instructions); - effect(KILL cr); - - format %{ "BLSRL $dst, $src" %} - - ins_encode %{ - __ blsrl($dst$$Register, $src$$Register); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) )); - predicate(UseBMI1Instructions); - effect(KILL cr); - - ins_cost(125); - format %{ "BLSRL $dst, $src" %} - - ins_encode %{ - __ blsrl($dst$$Register, $src$$Address); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Or Instructions -// Or Register with Register -instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (OrI dst src)); - effect(KILL cr); - - size(2); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{ - match(Set dst (OrI dst (CastP2X src))); - effect(KILL cr); - - size(2); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - - -// Or Register with Immediate -instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (OrI dst src)); - effect(KILL cr); - - format %{ "OR $dst,$src" %} - opcode(0x81,0x01); /* Opcode 81 /1 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// Or Register with Memory -instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (OrI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "OR $dst,$src" %} - opcode(0x0B); - ins_encode( SetInstMark, OpcP, RegMem( dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// Or Memory with Register -instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (OrI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "OR $dst,$src" %} - opcode(0x09); /* Opcode 09 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Or Memory with Immediate -instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (OrI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "OR $dst,$src" %} - opcode(0x81,0x1); /* Opcode 81 /1 id */ - // ins_encode( MemImm( dst, src) ); - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -// ROL/ROR -// ROL expand -instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xD1, 0x0); /* Opcode D1 /0 */ - ins_encode( OpcP, RegOpc( dst )); - ins_pipe( ialu_reg ); -%} - -instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xC1, 0x0); /*Opcode /C1 /0 */ - ins_encode( RegOpcImm(dst, shift) ); - ins_pipe(ialu_reg); -%} - -instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROL $dst, $shift" %} - opcode(0xD3, 0x0); /* Opcode D3 /0 */ - ins_encode(OpcP, RegOpc(dst)); - ins_pipe( ialu_reg_reg ); -%} -// end of ROL expand - -// ROL 32bit by one once -instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); - - expand %{ - rolI_eReg_imm1(dst, lshift, cr); - %} -%} - -// ROL 32bit var by imm8 once -instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{ - predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); - match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift))); - - expand %{ - rolI_eReg_imm8(dst, lshift, cr); - %} -%} - -// ROL 32bit var by var once -instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift)))); - - expand %{ - rolI_eReg_CL(dst, shift, cr); - %} -%} - -// ROL 32bit var by var once -instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ - match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift)))); - - expand %{ - rolI_eReg_CL(dst, shift, cr); - %} -%} - -// ROR expand -instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xD1,0x1); /* Opcode D1 /1 */ - ins_encode( OpcP, RegOpc( dst ) ); - ins_pipe( ialu_reg ); -%} - -instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{ - effect (USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */ - ins_encode( RegOpcImm(dst, shift) ); - ins_pipe( ialu_reg ); -%} - -instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{ - effect(USE_DEF dst, USE shift, KILL cr); - - format %{ "ROR $dst, $shift" %} - opcode(0xD3, 0x1); /* Opcode D3 /1 */ - ins_encode(OpcP, RegOpc(dst)); - ins_pipe( ialu_reg_reg ); -%} -// end of ROR expand - -// ROR right once -instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); - - expand %{ - rorI_eReg_imm1(dst, rshift, cr); - %} -%} - -// ROR 32bit by immI8 once -instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{ - predicate( 0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); - match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift))); - - expand %{ - rorI_eReg_imm8(dst, rshift, cr); - %} -%} - -// ROR 32bit var by var once -instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift)))); - - expand %{ - rorI_eReg_CL(dst, shift, cr); - %} -%} - -// ROR 32bit var by var once -instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{ - match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift)))); - - expand %{ - rorI_eReg_CL(dst, shift, cr); - %} -%} - -// Xor Instructions -// Xor Register with Register -instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (XorI dst src)); - effect(KILL cr); - - size(2); - format %{ "XOR $dst,$src" %} - opcode(0x33); - ins_encode( OpcP, RegReg( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -// Xor Register with Immediate -1 -instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{ - match(Set dst (XorI dst imm)); - - size(2); - format %{ "NOT $dst" %} - ins_encode %{ - __ notl($dst$$Register); - %} - ins_pipe( ialu_reg ); -%} - -// Xor Register with Immediate -instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{ - match(Set dst (XorI dst src)); - effect(KILL cr); - - format %{ "XOR $dst,$src" %} - opcode(0x81,0x06); /* Opcode 81 /6 id */ - // ins_encode( RegImm( dst, src) ); - ins_encode( OpcSErm( dst, src ), Con8or32( src ) ); - ins_pipe( ialu_reg ); -%} - -// Xor Register with Memory -instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{ - match(Set dst (XorI dst (LoadI src))); - effect(KILL cr); - - ins_cost(150); - format %{ "XOR $dst,$src" %} - opcode(0x33); - ins_encode( SetInstMark, OpcP, RegMem(dst, src), ClearInstMark ); - ins_pipe( ialu_reg_mem ); -%} - -// Xor Memory with Register -instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (XorI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(150); - format %{ "XOR $dst,$src" %} - opcode(0x31); /* Opcode 31 /r */ - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_reg ); -%} - -// Xor Memory with Immediate -instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{ - match(Set dst (StoreI dst (XorI (LoadI dst) src))); - effect(KILL cr); - - ins_cost(125); - format %{ "XOR $dst,$src" %} - opcode(0x81,0x6); /* Opcode 81 /6 id */ - ins_encode( SetInstMark, OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32(src), ClearInstMark ); - ins_pipe( ialu_mem_imm ); -%} - -//----------Convert Int to Boolean--------------------------------------------- - -instruct movI_nocopy(rRegI dst, rRegI src) %{ - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{ - effect( USE_DEF dst, USE src, KILL cr ); - - size(4); - format %{ "NEG $dst\n\t" - "ADC $dst,$src" %} - ins_encode( neg_reg(dst), - OpcRegReg(0x13,dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{ - match(Set dst (Conv2B src)); - - expand %{ - movI_nocopy(dst,src); - ci2b(dst,src,cr); - %} -%} - -instruct movP_nocopy(rRegI dst, eRegP src) %{ - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src" %} - ins_encode( enc_Copy( dst, src) ); - ins_pipe( ialu_reg_reg ); -%} - -instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{ - effect( USE_DEF dst, USE src, KILL cr ); - format %{ "NEG $dst\n\t" - "ADC $dst,$src" %} - ins_encode( neg_reg(dst), - OpcRegReg(0x13,dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{ - match(Set dst (Conv2B src)); - - expand %{ - movP_nocopy(dst,src); - cp2b(dst,src,cr); - %} -%} - -instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{ - match(Set dst (CmpLTMask p q)); - effect(KILL cr); - ins_cost(400); - - // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination - format %{ "XOR $dst,$dst\n\t" - "CMP $p,$q\n\t" - "SETlt $dst\n\t" - "NEG $dst" %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Rd = $dst$$Register; - Label done; - __ xorl(Rd, Rd); - __ cmpl(Rp, Rq); - __ setb(Assembler::less, Rd); - __ negl(Rd); - %} - - ins_pipe(pipe_slow); -%} - -instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{ - match(Set dst (CmpLTMask dst zero)); - effect(DEF dst, KILL cr); - ins_cost(100); - - format %{ "SAR $dst,31\t# cmpLTMask0" %} - ins_encode %{ - __ sarl($dst$$Register, 31); - %} - ins_pipe(ialu_reg); -%} - -/* better to save a register than avoid a branch */ -instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ - match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q))); - effect(KILL cr); - ins_cost(400); - format %{ "SUB $p,$q\t# cadd_cmpLTMask\n\t" - "JGE done\n\t" - "ADD $p,$y\n" - "done: " %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Ry = $y$$Register; - Label done; - __ subl(Rp, Rq); - __ jccb(Assembler::greaterEqual, done); - __ addl(Rp, Ry); - __ bind(done); - %} - - ins_pipe(pipe_cmplt); -%} - -/* better to save a register than avoid a branch */ -instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{ - match(Set y (AndI (CmpLTMask p q) y)); - effect(KILL cr); - - ins_cost(300); - - format %{ "CMPL $p, $q\t# and_cmpLTMask\n\t" - "JLT done\n\t" - "XORL $y, $y\n" - "done: " %} - ins_encode %{ - Register Rp = $p$$Register; - Register Rq = $q$$Register; - Register Ry = $y$$Register; - Label done; - __ cmpl(Rp, Rq); - __ jccb(Assembler::less, done); - __ xorl(Ry, Ry); - __ bind(done); - %} - - ins_pipe(pipe_cmplt); -%} - -/* If I enable this, I encourage spilling in the inner loop of compress. -instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{ - match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q))); -*/ -//----------Overflow Math Instructions----------------------------------------- - -instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2) -%{ - match(Set cr (OverflowAddI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "ADD $op1, $op2\t# overflow check int" %} - - ins_encode %{ - __ addl($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2) -%{ - match(Set cr (OverflowAddI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "ADD $op1, $op2\t# overflow check int" %} - - ins_encode %{ - __ addl($op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2) -%{ - match(Set cr (OverflowSubI op1 op2)); - - format %{ "CMP $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ cmpl($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2) -%{ - match(Set cr (OverflowSubI op1 op2)); - - format %{ "CMP $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ cmpl($op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2) -%{ - match(Set cr (OverflowSubI zero op2)); - effect(DEF cr, USE_KILL op2); - - format %{ "NEG $op2\t# overflow check int" %} - ins_encode %{ - __ negl($op2$$Register); - %} - ins_pipe(ialu_reg_reg); -%} - -instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2) -%{ - match(Set cr (OverflowMulI op1 op2)); - effect(DEF cr, USE_KILL op1, USE op2); - - format %{ "IMUL $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ imull($op1$$Register, $op2$$Register); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp) -%{ - match(Set cr (OverflowMulI op1 op2)); - effect(DEF cr, TEMP tmp, USE op1, USE op2); - - format %{ "IMUL $tmp, $op1, $op2\t# overflow check int" %} - ins_encode %{ - __ imull($tmp$$Register, $op1$$Register, $op2$$constant); - %} - ins_pipe(ialu_reg_reg_alu0); -%} - -// Integer Absolute Instructions -instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr) -%{ - match(Set dst (AbsI src)); - effect(TEMP dst, TEMP tmp, KILL cr); - format %{ "movl $tmp, $src\n\t" - "sarl $tmp, 31\n\t" - "movl $dst, $src\n\t" - "xorl $dst, $tmp\n\t" - "subl $dst, $tmp\n" - %} - ins_encode %{ - __ movl($tmp$$Register, $src$$Register); - __ sarl($tmp$$Register, 31); - __ movl($dst$$Register, $src$$Register); - __ xorl($dst$$Register, $tmp$$Register); - __ subl($dst$$Register, $tmp$$Register); - %} - - ins_pipe(ialu_reg_reg); -%} - -//----------Long Instructions------------------------------------------------ -// Add Long Register with Register -instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (AddL dst src)); - effect(KILL cr); - ins_cost(200); - format %{ "ADD $dst.lo,$src.lo\n\t" - "ADC $dst.hi,$src.hi" %} - opcode(0x03, 0x13); - ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Add Long Register with Immediate -instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (AddL dst src)); - effect(KILL cr); - format %{ "ADD $dst.lo,$src.lo\n\t" - "ADC $dst.hi,$src.hi" %} - opcode(0x81,0x00,0x02); /* Opcode 81 /0, 81 /2 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Add Long Register with Memory -instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (AddL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "ADD $dst.lo,$mem\n\t" - "ADC $dst.hi,$mem+4" %} - opcode(0x03, 0x13); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Subtract Long Register with Register. -instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (SubL dst src)); - effect(KILL cr); - ins_cost(200); - format %{ "SUB $dst.lo,$src.lo\n\t" - "SBB $dst.hi,$src.hi" %} - opcode(0x2B, 0x1B); - ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Subtract Long Register with Immediate -instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (SubL dst src)); - effect(KILL cr); - format %{ "SUB $dst.lo,$src.lo\n\t" - "SBB $dst.hi,$src.hi" %} - opcode(0x81,0x05,0x03); /* Opcode 81 /5, 81 /3 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Subtract Long Register with Memory -instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (SubL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "SUB $dst.lo,$mem\n\t" - "SBB $dst.hi,$mem+4" %} - opcode(0x2B, 0x1B); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{ - match(Set dst (SubL zero dst)); - effect(KILL cr); - ins_cost(300); - format %{ "NEG $dst.hi\n\tNEG $dst.lo\n\tSBB $dst.hi,0" %} - ins_encode( neg_long(dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// And Long Register with Register -instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (AndL dst src)); - effect(KILL cr); - format %{ "AND $dst.lo,$src.lo\n\t" - "AND $dst.hi,$src.hi" %} - opcode(0x23,0x23); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// And Long Register with Immediate -instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (AndL dst src)); - effect(KILL cr); - format %{ "AND $dst.lo,$src.lo\n\t" - "AND $dst.hi,$src.hi" %} - opcode(0x81,0x04,0x04); /* Opcode 81 /4, 81 /4 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// And Long Register with Memory -instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (AndL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "AND $dst.lo,$mem\n\t" - "AND $dst.hi,$mem+4" %} - opcode(0x23, 0x23); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// BMI1 instructions -instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndL (XorL src1 minus_1) src2)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "ANDNL $dst.lo, $src1.lo, $src2.lo\n\t" - "ANDNL $dst.hi, $src1.hi, $src2.hi" - %} - - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc1 = $src1$$Register; - Register Rsrc2 = $src2$$Register; - __ andnl(Rdst, Rsrc1, Rsrc2); - __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2)); - %} - ins_pipe(ialu_reg_reg_long); -%} - -instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{ - match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "ANDNL $dst.lo, $src1.lo, $src2\n\t" - "ANDNL $dst.hi, $src1.hi, $src2+4" - %} - - ins_encode %{ - Register Rdst = $dst$$Register; - Register Rsrc1 = $src1$$Register; - Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none); - - __ andnl(Rdst, Rsrc1, $src2$$Address); - __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndL (SubL imm_zero src) src)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, 0\n\t" - "BLSIL $dst.lo, $src.lo\n\t" - "JNZ done\n\t" - "BLSIL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsil(Rdst, Rsrc); - __ jccb(Assembler::notZero, done); - __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - ins_pipe(ialu_reg); -%} - -instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{ - match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, 0\n\t" - "BLSIL $dst.lo, $src\n\t" - "JNZ done\n\t" - "BLSIL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsil(Rdst, $src$$Address); - __ jccb(Assembler::notZero, done); - __ blsil(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - ins_pipe(ialu_reg_mem); -%} - -instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorL (AddL src minus_1) src)); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, 0\n\t" - "BLSMSKL $dst.lo, $src.lo\n\t" - "JNC done\n\t" - "BLSMSKL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsmskl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, 0\n\t" - "BLSMSKL $dst.lo, $src\n\t" - "JNC done\n\t" - "BLSMSKL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - - __ movl(HIGH_FROM_LOW(Rdst), 0); - __ blsmskl(Rdst, $src$$Address); - __ jccb(Assembler::carryClear, done); - __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - - ins_pipe(ialu_reg_mem); -%} - -instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndL (AddL src minus_1) src) ); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - format %{ "MOVL $dst.hi, $src.hi\n\t" - "BLSRL $dst.lo, $src.lo\n\t" - "JNC done\n\t" - "BLSRL $dst.hi, $src.hi\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Register Rsrc = $src$$Register; - __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ blsrl(Rdst, Rsrc); - __ jccb(Assembler::carryClear, done); - __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc)); - __ bind(done); - %} - - ins_pipe(ialu_reg); -%} - -instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr) -%{ - match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) )); - predicate(UseBMI1Instructions); - effect(KILL cr, TEMP dst); - - ins_cost(125); - format %{ "MOVL $dst.hi, $src+4\n\t" - "BLSRL $dst.lo, $src\n\t" - "JNC done\n\t" - "BLSRL $dst.hi, $src+4\n" - "done:" - %} - - ins_encode %{ - Label done; - Register Rdst = $dst$$Register; - Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none); - __ movl(HIGH_FROM_LOW(Rdst), src_hi); - __ blsrl(Rdst, $src$$Address); - __ jccb(Assembler::carryClear, done); - __ blsrl(HIGH_FROM_LOW(Rdst), src_hi); - __ bind(done); - %} - - ins_pipe(ialu_reg_mem); -%} - -// Or Long Register with Register -instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (OrL dst src)); - effect(KILL cr); - format %{ "OR $dst.lo,$src.lo\n\t" - "OR $dst.hi,$src.hi" %} - opcode(0x0B,0x0B); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Or Long Register with Immediate -instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (OrL dst src)); - effect(KILL cr); - format %{ "OR $dst.lo,$src.lo\n\t" - "OR $dst.hi,$src.hi" %} - opcode(0x81,0x01,0x01); /* Opcode 81 /1, 81 /1 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Or Long Register with Memory -instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (OrL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "OR $dst.lo,$mem\n\t" - "OR $dst.hi,$mem+4" %} - opcode(0x0B,0x0B); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Xor Long Register with Register -instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (XorL dst src)); - effect(KILL cr); - format %{ "XOR $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$src.hi" %} - opcode(0x33,0x33); - ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Xor Long Register with Immediate -1 -instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{ - match(Set dst (XorL dst imm)); - format %{ "NOT $dst.lo\n\t" - "NOT $dst.hi" %} - ins_encode %{ - __ notl($dst$$Register); - __ notl(HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Xor Long Register with Immediate -instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{ - match(Set dst (XorL dst src)); - effect(KILL cr); - format %{ "XOR $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$src.hi" %} - opcode(0x81,0x06,0x06); /* Opcode 81 /6, 81 /6 */ - ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) ); - ins_pipe( ialu_reg_long ); -%} - -// Xor Long Register with Memory -instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{ - match(Set dst (XorL dst (LoadL mem))); - effect(KILL cr); - ins_cost(125); - format %{ "XOR $dst.lo,$mem\n\t" - "XOR $dst.hi,$mem+4" %} - opcode(0x33,0x33); - ins_encode( SetInstMark, OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem), ClearInstMark ); - ins_pipe( ialu_reg_long_mem ); -%} - -// Shift Left Long by 1 -instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 2 -instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 3 -instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{ - predicate(UseNewLongLShift); - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(100); - format %{ "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi\n\t" - "ADD $dst.lo,$dst.lo\n\t" - "ADC $dst.hi,$dst.hi" %} - ins_encode %{ - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - __ addl($dst$$Register,$dst$$Register); - __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register)); - %} - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 1-31 -instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHLD $dst.hi,$dst.lo,$cnt\n\t" - "SHL $dst.lo,$cnt" %} - opcode(0xC1, 0x4, 0xA4); /* 0F/A4, then C1 /4 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by 32-63 -instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (LShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.hi,$dst.lo\n" - "\tSHL $dst.hi,$cnt-32\n" - "\tXOR $dst.lo,$dst.lo" %} - opcode(0xC1, 0x4); /* C1 /4 ib */ - ins_encode( move_long_big_shift_clr(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Left Long by variable -instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (LShiftL dst shift)); - effect(KILL cr); - ins_cost(500+200); - size(17); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.hi,$dst.lo\n\t" - "XOR $dst.lo,$dst.lo\n" - "small:\tSHLD $dst.hi,$dst.lo,$shift\n\t" - "SHL $dst.lo,$shift" %} - ins_encode( shift_left_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - -// Shift Right Long by 1-31 -instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (URShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" - "SHR $dst.hi,$cnt" %} - opcode(0xC1, 0x5, 0xAC); /* 0F/AC, then C1 /5 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by 32-63 -instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (URShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.lo,$dst.hi\n" - "\tSHR $dst.lo,$cnt-32\n" - "\tXOR $dst.hi,$dst.hi" %} - opcode(0xC1, 0x5); /* C1 /5 ib */ - ins_encode( move_long_big_shift_clr(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by variable -instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (URShiftL dst shift)); - effect(KILL cr); - ins_cost(600); - size(17); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.lo,$dst.hi\n\t" - "XOR $dst.hi,$dst.hi\n" - "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" - "SHR $dst.hi,$shift" %} - ins_encode( shift_right_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - -// Shift Right Long by 1-31 -instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{ - match(Set dst (RShiftL dst cnt)); - effect(KILL cr); - ins_cost(200); - format %{ "SHRD $dst.lo,$dst.hi,$cnt\n\t" - "SAR $dst.hi,$cnt" %} - opcode(0xC1, 0x7, 0xAC); /* 0F/AC, then C1 /7 ib */ - ins_encode( move_long_small_shift(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right Long by 32-63 -instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{ - match(Set dst (RShiftL dst cnt)); - effect(KILL cr); - ins_cost(300); - format %{ "MOV $dst.lo,$dst.hi\n" - "\tSAR $dst.lo,$cnt-32\n" - "\tSAR $dst.hi,31" %} - opcode(0xC1, 0x7); /* C1 /7 ib */ - ins_encode( move_long_big_shift_sign(dst,cnt) ); - ins_pipe( ialu_reg_long ); -%} - -// Shift Right arithmetic Long by variable -instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{ - match(Set dst (RShiftL dst shift)); - effect(KILL cr); - ins_cost(600); - size(18); - format %{ "TEST $shift,32\n\t" - "JEQ,s small\n\t" - "MOV $dst.lo,$dst.hi\n\t" - "SAR $dst.hi,31\n" - "small:\tSHRD $dst.lo,$dst.hi,$shift\n\t" - "SAR $dst.hi,$shift" %} - ins_encode( shift_right_arith_long( dst, shift ) ); - ins_pipe( pipe_slow ); -%} - - -//----------Double Instructions------------------------------------------------ -// Double Math - -// Compare & branch - -// P6 version of float compare, sets condition codes in EFLAGS -instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ - predicate(VM_Version::supports_cmov() && UseSSE <=1); - match(Set cr (CmpD src1 src2)); - effect(KILL rax); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction\n\t" - "JNP exit\n\t" - "MOV ah,1 // saw a NaN, set CF\n\t" - "SAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - cmpF_P6_fixup ); - ins_pipe( pipe_slow ); -%} - -instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{ - predicate(VM_Version::supports_cmov() && UseSSE <=1); - match(Set cr (CmpD src1 src2)); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2)); - ins_pipe( pipe_slow ); -%} - -// Compare & branch -instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{ - predicate(UseSSE<=1); - match(Set cr (CmpD src1 src2)); - effect(KILL rax); - ins_cost(200); - format %{ "FLD $src1\n\t" - "FCOMp $src2\n\t" - "FNSTSW AX\n\t" - "TEST AX,0x400\n\t" - "JZ,s flags\n\t" - "MOV AH,1\t# unordered treat as LT\n" - "flags:\tSAHF" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - fpu_flags); - ins_pipe( pipe_slow ); -%} - -// Compare vs zero into -1,0,1 -instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE<=1); - match(Set dst (CmpD3 src1 zero)); - effect(KILL cr, KILL rax); - ins_cost(280); - format %{ "FTSTD $dst,$src1" %} - opcode(0xE4, 0xD9); - ins_encode( Push_Reg_DPR(src1), - OpcS, OpcP, PopFPU, - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 -instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE<=1); - match(Set dst (CmpD3 src1 src2)); - effect(KILL cr, KILL rax); - ins_cost(300); - format %{ "FCMPD $dst,$src1,$src2" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 src2)); - ins_cost(145); - format %{ "UCOMISD $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 src2)); - ins_cost(100); - format %{ "UCOMISD $src1,$src2" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 (LoadD src2))); - ins_cost(145); - format %{ "UCOMISD $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{ - predicate(UseSSE>=2); - match(Set cr (CmpD src1 (LoadD src2))); - ins_cost(100); - format %{ "UCOMISD $src1,$src2" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM -instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{ - predicate(UseSSE>=2); - match(Set dst (CmpD3 src1 src2)); - effect(KILL cr); - ins_cost(255); - format %{ "UCOMISD $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM and memory -instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{ - predicate(UseSSE>=2); - match(Set dst (CmpD3 src1 (LoadD src2))); - effect(KILL cr); - ins_cost(275); - format %{ "UCOMISD $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomisd($src1$$XMMRegister, $src2$$Address); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - - -instruct subDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE <=1); - match(Set dst (SubD dst src)); - - format %{ "FLD $src\n\t" - "DSUBp $dst,ST" %} - opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ - predicate (UseSSE <=1); - match(Set dst (RoundDouble (SubD src1 src2))); - ins_cost(250); - - format %{ "FLD $src2\n\t" - "DSUB ST,$src1\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xD8, 0x5); - ins_encode( Push_Reg_DPR(src2), - OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} - - -instruct subDPR_reg_mem(regDPR dst, memory src) %{ - predicate (UseSSE <=1); - match(Set dst (SubD dst (LoadD src))); - ins_cost(150); - - format %{ "FLD $src\n\t" - "DSUBp $dst,ST" %} - opcode(0xDE, 0x5, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (AbsD src)); - ins_cost(100); - format %{ "FABS" %} - opcode(0xE1, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{ - predicate(UseSSE<=1); - match(Set dst (NegD src)); - ins_cost(100); - format %{ "FCHS" %} - opcode(0xE0, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct addDPR_reg(regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst src)); - format %{ "FLD $src\n\t" - "DADD $dst,ST" %} - size(4); - ins_cost(150); - opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - - -instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{ - predicate(UseSSE<=1); - match(Set dst (RoundDouble (AddD src1 src2))); - ins_cost(250); - - format %{ "FLD $src2\n\t" - "DADD ST,$src1\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/ - ins_encode( Push_Reg_DPR(src2), - OpcP, RegOpc(src1), Pop_Mem_DPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} - - -instruct addDPR_reg_mem(regDPR dst, memory src) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst (LoadD src))); - ins_cost(150); - - format %{ "FLD $src\n\t" - "DADDp $dst,ST" %} - opcode(0xDE, 0x0, 0xDD); /* DE C0+i */ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// add-to-memory -instruct addDPR_mem_reg(memory dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src)))); - ins_cost(150); - - format %{ "FLD_D $dst\n\t" - "DADD ST,$src\n\t" - "FST_D $dst" %} - opcode(0xDD, 0x0); - ins_encode( SetInstMark, Opcode(0xDD), RMopc_Mem(0x00,dst), - Opcode(0xD8), RegOpc(src), ClearInstMark, - SetInstMark, - Opcode(0xDD), RMopc_Mem(0x03,dst), - ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{ - predicate(UseSSE<=1); - match(Set dst (AddD dst con)); - ins_cost(125); - format %{ "FLD1\n\t" - "DADDp $dst,ST" %} - ins_encode %{ - __ fld1(); - __ faddp($dst$$reg); - %} - ins_pipe(fpu_reg); -%} - -instruct addDPR_reg_imm(regDPR dst, immDPR con) %{ - predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (AddD dst con)); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DADDp $dst,ST" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ faddp($dst$$reg); - %} - ins_pipe(fpu_reg_mem); -%} - -instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{ - predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (RoundDouble (AddD src con))); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DADD ST,$src\n\t" - "FSTP_D $dst\t# D-round" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fadd($src$$reg); - __ fstp_d(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} - -instruct mulDPR_reg(regDPR dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (MulD dst src)); - format %{ "FLD $src\n\t" - "DMULp $dst,ST" %} - opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Strict FP instruction biases argument before multiply then -// biases result to avoid double rounding of subnormals. -// -// scale arg1 by multiplying arg1 by 2^(-15360) -// load arg2 -// multiply scaled arg1 by arg2 -// rescale product by 2^(15360) -// -instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{ - predicate( UseSSE<=1 && Compile::current()->has_method() ); - match(Set dst (MulD dst src)); - ins_cost(1); // Select this instruction for all FP double multiplies - - format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" - "DMULp $dst,ST\n\t" - "FLD $src\n\t" - "DMULp $dst,ST\n\t" - "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" - "DMULp $dst,ST\n\t" %} - opcode(0xDE, 0x1); /* DE C8+i or DE /1*/ - ins_encode( strictfp_bias1(dst), - Push_Reg_DPR(src), - OpcP, RegOpc(dst), - strictfp_bias2(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{ - predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 ); - match(Set dst (MulD dst con)); - ins_cost(200); - format %{ "FLD_D [$constantaddress]\t# load from constant table: double=$con\n\t" - "DMULp $dst,ST" %} - ins_encode %{ - __ fld_d($constantaddress($con)); - __ fmulp($dst$$reg); - %} - ins_pipe(fpu_reg_mem); -%} - - -instruct mulDPR_reg_mem(regDPR dst, memory src) %{ - predicate( UseSSE<=1 ); - match(Set dst (MulD dst (LoadD src))); - ins_cost(200); - format %{ "FLD_D $src\n\t" - "DMULp $dst,ST" %} - opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/ /* LoadD DD /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// -// Cisc-alternate to reg-reg multiply -instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{ - predicate( UseSSE<=1 ); - match(Set dst (MulD src (LoadD mem))); - ins_cost(250); - format %{ "FLD_D $mem\n\t" - "DMUL ST,$src\n\t" - "FSTP_D $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadD D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem), - OpcReg_FPR(src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} - - -// MACRO3 -- addDPR a mulDPR -// This instruction is a '2-address' instruction in that the result goes -// back to src2. This eliminates a move from the macro; possibly the -// register allocator will have to add it back (and maybe not). -instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ - predicate( UseSSE<=1 ); - match(Set src2 (AddD (MulD src0 src1) src2)); - format %{ "FLD $src0\t# ===MACRO3d===\n\t" - "DMUL ST,$src1\n\t" - "DADDp $src2,ST" %} - ins_cost(250); - opcode(0xDD); /* LoadD DD /0 */ - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - FAddP_reg_ST(src2) ); - ins_pipe( fpu_reg_reg_reg ); -%} - - -// MACRO3 -- subDPR a mulDPR -instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{ - predicate( UseSSE<=1 ); - match(Set src2 (SubD (MulD src0 src1) src2)); - format %{ "FLD $src0\t# ===MACRO3d===\n\t" - "DMUL ST,$src1\n\t" - "DSUBRp $src2,ST" %} - ins_cost(250); - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - Opcode(0xDE), Opc_plus(0xE0,src2)); - ins_pipe( fpu_reg_reg_reg ); -%} - - -instruct divDPR_reg(regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 ); - match(Set dst (DivD dst src)); - - format %{ "FLD $src\n\t" - "FDIVp $dst,ST" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_cost(150); - ins_encode( Push_Reg_DPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Strict FP instruction biases argument before division then -// biases result, to avoid double rounding of subnormals. -// -// scale dividend by multiplying dividend by 2^(-15360) -// load divisor -// divide scaled dividend by divisor -// rescale quotient by 2^(15360) -// -instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{ - predicate (UseSSE<=1); - match(Set dst (DivD dst src)); - predicate( UseSSE<=1 && Compile::current()->has_method() ); - ins_cost(01); - - format %{ "FLD StubRoutines::x86::_fpu_subnormal_bias1\n\t" - "DMULp $dst,ST\n\t" - "FLD $src\n\t" - "FDIVp $dst,ST\n\t" - "FLD StubRoutines::x86::_fpu_subnormal_bias2\n\t" - "DMULp $dst,ST\n\t" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( strictfp_bias1(dst), - Push_Reg_DPR(src), - OpcP, RegOpc(dst), - strictfp_bias2(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE<=1); - match(Set dst (ModD dst src)); - effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS - - format %{ "DMOD $dst,$src" %} - ins_cost(250); - ins_encode(Push_Reg_Mod_DPR(dst, src), - emitModDPR(), - Push_Result_Mod_DPR(src), - Pop_Reg_DPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE>=2); - match(Set dst (ModD src0 src1)); - effect(KILL rax, KILL cr); - - format %{ "SUB ESP,8\t # DMOD\n" - "\tMOVSD [ESP+0],$src1\n" - "\tFLD_D [ESP+0]\n" - "\tMOVSD [ESP+0],$src0\n" - "\tFLD_D [ESP+0]\n" - "loop:\tFPREM\n" - "\tFWAIT\n" - "\tFNSTSW AX\n" - "\tSAHF\n" - "\tJP loop\n" - "\tFSTP_D [ESP+0]\n" - "\tMOVSD $dst,[ESP+0]\n" - "\tADD ESP,8\n" - "\tFSTP ST0\t # Restore FPU Stack" - %} - ins_cost(250); - ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU); - ins_pipe( pipe_slow ); -%} - -instruct atanDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE<=1); - match(Set dst(AtanD dst src)); - format %{ "DATA $dst,$src" %} - opcode(0xD9, 0xF3); - ins_encode( Push_Reg_DPR(src), - OpcP, OpcS, RegOpc(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst(AtanD dst src)); - effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8" - format %{ "DATA $dst,$src" %} - opcode(0xD9, 0xF3); - ins_encode( Push_SrcD(src), - OpcP, OpcS, Push_ResultD(dst) ); - ins_pipe( pipe_slow ); -%} - -instruct sqrtDPR_reg(regDPR dst, regDPR src) %{ - predicate (UseSSE<=1); - match(Set dst (SqrtD src)); - format %{ "DSQRT $dst,$src" %} - opcode(0xFA, 0xD9); - ins_encode( Push_Reg_DPR(src), - OpcS, OpcP, Pop_Reg_DPR(dst) ); - ins_pipe( pipe_slow ); -%} - -//-------------Float Instructions------------------------------- -// Float Math - -// Code for float compare: -// fcompp(); -// fwait(); fnstsw_ax(); -// sahf(); -// movl(dst, unordered_result); -// jcc(Assembler::parity, exit); -// movl(dst, less_result); -// jcc(Assembler::below, exit); -// movl(dst, equal_result); -// jcc(Assembler::equal, exit); -// movl(dst, greater_result); -// exit: - -// P6 version of float compare, sets condition codes in EFLAGS -instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ - predicate(VM_Version::supports_cmov() && UseSSE == 0); - match(Set cr (CmpF src1 src2)); - effect(KILL rax); - ins_cost(150); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction\n\t" - "JNP exit\n\t" - "MOV ah,1 // saw a NaN, set CF (treat as LT)\n\t" - "SAHF\n" - "exit:\tNOP // avoid branch to branch" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - cmpF_P6_fixup ); - ins_pipe( pipe_slow ); -%} - -instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{ - predicate(VM_Version::supports_cmov() && UseSSE == 0); - match(Set cr (CmpF src1 src2)); - ins_cost(100); - format %{ "FLD $src1\n\t" - "FUCOMIP ST,$src2 // P6 instruction" %} - opcode(0xDF, 0x05); /* DF E8+i or DF /5 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2)); - ins_pipe( pipe_slow ); -%} - - -// Compare & branch -instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{ - predicate(UseSSE == 0); - match(Set cr (CmpF src1 src2)); - effect(KILL rax); - ins_cost(200); - format %{ "FLD $src1\n\t" - "FCOMp $src2\n\t" - "FNSTSW AX\n\t" - "TEST AX,0x400\n\t" - "JZ,s flags\n\t" - "MOV AH,1\t# unordered treat as LT\n" - "flags:\tSAHF" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - fpu_flags); - ins_pipe( pipe_slow ); -%} - -// Compare vs zero into -1,0,1 -instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE == 0); - match(Set dst (CmpF3 src1 zero)); - effect(KILL cr, KILL rax); - ins_cost(280); - format %{ "FTSTF $dst,$src1" %} - opcode(0xE4, 0xD9); - ins_encode( Push_Reg_DPR(src1), - OpcS, OpcP, PopFPU, - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 -instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE == 0); - match(Set dst (CmpF3 src1 src2)); - effect(KILL cr, KILL rax); - ins_cost(300); - format %{ "FCMPF $dst,$src1,$src2" %} - opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */ - ins_encode( Push_Reg_DPR(src1), - OpcP, RegOpc(src2), - CmpF_Result(dst)); - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 src2)); - ins_cost(145); - format %{ "UCOMISS $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 src2)); - ins_cost(100); - format %{ "UCOMISS $src1,$src2" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// float compare and set condition codes in EFLAGS by XMM regs -instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 (LoadF src2))); - ins_cost(165); - format %{ "UCOMISS $src1,$src2\n\t" - "JNP,s exit\n\t" - "PUSHF\t# saw NaN, set CF\n\t" - "AND [rsp], #0xffffff2b\n\t" - "POPF\n" - "exit:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - emit_cmpfp_fixup(masm); - %} - ins_pipe( pipe_slow ); -%} - -instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{ - predicate(UseSSE>=1); - match(Set cr (CmpF src1 (LoadF src2))); - ins_cost(100); - format %{ "UCOMISS $src1,$src2" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM -instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{ - predicate(UseSSE>=1); - match(Set dst (CmpF3 src1 src2)); - effect(KILL cr); - ins_cost(255); - format %{ "UCOMISS $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Compare into -1,0,1 in XMM and memory -instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{ - predicate(UseSSE>=1); - match(Set dst (CmpF3 src1 (LoadF src2))); - effect(KILL cr); - ins_cost(275); - format %{ "UCOMISS $src1, $src2\n\t" - "MOV $dst, #-1\n\t" - "JP,s done\n\t" - "JB,s done\n\t" - "SETNE $dst\n\t" - "MOVZB $dst, $dst\n" - "done:" %} - ins_encode %{ - __ ucomiss($src1$$XMMRegister, $src2$$Address); - emit_cmpfp3(masm, $dst$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// Spill to obtain 24-bit precision -instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (SubF src1 src2)); - - format %{ "FSUB $dst,$src1 - $src2" %} - opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct subFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (SubF dst src)); - - format %{ "FSUB $dst,$src" %} - opcode(0xDE, 0x5); /* DE E8+i or DE /5 */ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -// Spill to obtain 24-bit precision -instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0); /* D8 C0+i */ - ins_encode( Push_Reg_FPR(src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct addFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF dst src)); - - format %{ "FLD $src\n\t" - "FADDp $dst,ST" %} - opcode(0xDE, 0x0); /* DE C0+i or DE /0*/ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - -instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set dst (AbsF src)); - ins_cost(100); - format %{ "FABS" %} - opcode(0xE1, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{ - predicate(UseSSE==0); - match(Set dst (NegF src)); - ins_cost(100); - format %{ "FCHS" %} - opcode(0xE0, 0xD9); - ins_encode( OpcS, OpcP ); - ins_pipe( fpu_reg_reg ); -%} - -// Cisc-alternate to addFPR_reg -// Spill to obtain 24-bit precision -instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 (LoadF src2))); - - format %{ "FLD $src2\n\t" - "FADD ST,$src1\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} -// -// Cisc-alternate to addFPR_reg -// This instruction does not round to 24-bits -instruct addFPR_reg_mem(regFPR dst, memory src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF dst (LoadF src))); - - format %{ "FADD $dst,$src" %} - opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src), - OpcP, RegOpc(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -// // Following two instructions for _222_mpegaudio -// Spill to obtain 24-bit precision -instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} - -// Cisc-spill variant -// Spill to obtain 24-bit precision -instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 (LoadF src2))); - - format %{ "FADD $dst,$src1,$src2 cisc" %} - opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark); - ins_pipe( fpu_mem_mem_mem ); -%} - -// Spill to obtain 24-bit precision -instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src1 src2)); - - format %{ "FADD $dst,$src1,$src2" %} - opcode(0xD8, 0x0, 0xD9); /* D8 /0 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark); - ins_pipe( fpu_mem_mem_mem ); -%} - - -// Spill to obtain 24-bit precision -instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src con)); - format %{ "FLD $src\n\t" - "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP_S $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fadd_s($constantaddress($con)); - __ fstp_s(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} -// -// This instruction does not round to 24-bits -instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF src con)); - format %{ "FLD $src\n\t" - "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fadd_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_reg_con); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FLD $src1\n\t" - "FMUL $src2\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FLD $src1\n\t" - "FMUL $src2\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1); /* D8 C8+i */ - ins_encode( Push_Reg_FPR(src2), - OpcReg_FPR(src1), - Pop_Reg_FPR(dst) ); - ins_pipe( fpu_reg_reg_reg ); -%} - - -// Spill to obtain 24-bit precision -// Cisc-alternate to reg-reg multiply -instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 (LoadF src2))); - - format %{ "FLD_S $src2\n\t" - "FMUL $src1\n\t" - "FSTP_S $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Mem_FPR(dst), ClearInstMark ); - ins_pipe( fpu_mem_reg_mem ); -%} -// -// This instruction does not round to 24-bits -// Cisc-alternate to reg-reg multiply -instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 (LoadF src2))); - - format %{ "FMUL $dst,$src1,$src2" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcReg_FPR(src1), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src1 src2)); - - format %{ "FMUL $dst,$src1,$src2" %} - opcode(0xD8, 0x1, 0xD9); /* D8 /1 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,src2), - OpcP, RMopc_Mem(secondary,src1), - Pop_Mem_FPR(dst), - ClearInstMark ); - ins_pipe( fpu_mem_mem_mem ); -%} - -// Spill to obtain 24-bit precision -instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src con)); - - format %{ "FLD $src\n\t" - "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP_S $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fmul_s($constantaddress($con)); - __ fstp_s(Address(rsp, $dst$$disp)); - %} - ins_pipe(fpu_mem_reg_con); -%} -// -// This instruction does not round to 24-bits -instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF src con)); - - format %{ "FLD $src\n\t" - "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t" - "FSTP $dst" %} - ins_encode %{ - __ fld_s($src$$reg - 1); // FLD ST(i-1) - __ fmul_s($constantaddress($con)); - __ fstp_d($dst$$reg); - %} - ins_pipe(fpu_reg_reg_con); -%} - - -// -// MACRO1 -- subsume unshared load into mulFPR -// This instruction does not round to 24-bits -instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (MulF (LoadF mem1) src)); - - format %{ "FLD $mem1 ===MACRO1===\n\t" - "FMUL ST,$src\n\t" - "FSTP $dst" %} - opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */ /* LoadF D9 /0 */ - ins_encode( SetInstMark, Opcode(tertiary), RMopc_Mem(0x00,mem1), - OpcReg_FPR(src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_reg_mem ); -%} -// -// MACRO2 -- addFPR a mulFPR which subsumed an unshared load -// This instruction does not round to 24-bits -instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (AddF (MulF (LoadF mem1) src1) src2)); - ins_cost(95); - - format %{ "FLD $mem1 ===MACRO2===\n\t" - "FMUL ST,$src1 subsume mulFPR left load\n\t" - "FADD ST,$src2\n\t" - "FSTP $dst" %} - opcode(0xD9); /* LoadF D9 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem1), - FMul_ST_reg(src1), - FAdd_ST_reg(src2), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem_reg_reg ); -%} - -// MACRO3 -- addFPR a mulFPR -// This instruction does not round to 24-bits. It is a '2-address' -// instruction in that the result goes back to src2. This eliminates -// a move from the macro; possibly the register allocator will have -// to add it back (and maybe not). -instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set src2 (AddF (MulF src0 src1) src2)); - - format %{ "FLD $src0 ===MACRO3===\n\t" - "FMUL ST,$src1\n\t" - "FADDP $src2,ST" %} - opcode(0xD9); /* LoadF D9 /0 */ - ins_encode( Push_Reg_FPR(src0), - FMul_ST_reg(src1), - FAddP_reg_ST(src2) ); - ins_pipe( fpu_reg_reg_reg ); -%} - -// MACRO4 -- divFPR subFPR -// This instruction does not round to 24-bits -instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (DivF (SubF src2 src1) src3)); - - format %{ "FLD $src2 ===MACRO4===\n\t" - "FSUB ST,$src1\n\t" - "FDIV ST,$src3\n\t" - "FSTP $dst" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_FPR(src2), - subFPR_divFPR_encode(src1,src3), - Pop_Reg_FPR(dst) ); - ins_pipe( fpu_reg_reg_reg_reg ); -%} - -// Spill to obtain 24-bit precision -instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{ - predicate(UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (DivF src1 src2)); - - format %{ "FDIV $dst,$src1,$src2" %} - opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/ - ins_encode( Push_Reg_FPR(src1), - OpcReg_FPR(src2), - Pop_Mem_FPR(dst) ); - ins_pipe( fpu_mem_reg_reg ); -%} -// -// This instruction does not round to 24-bits -instruct divFPR_reg(regFPR dst, regFPR src) %{ - predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (DivF dst src)); - - format %{ "FDIV $dst,$src" %} - opcode(0xDE, 0x7); /* DE F8+i or DE /7*/ - ins_encode( Push_Reg_FPR(src), - OpcP, RegOpc(dst) ); - ins_pipe( fpu_reg_reg ); -%} - - -// Spill to obtain 24-bit precision -instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{ - predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (ModF src1 src2)); - effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS - - format %{ "FMOD $dst,$src1,$src2" %} - ins_encode( Push_Reg_Mod_DPR(src1, src2), - emitModDPR(), - Push_Result_Mod_DPR(src2), - Pop_Mem_FPR(dst)); - ins_pipe( pipe_slow ); -%} -// -// This instruction does not round to 24-bits -instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{ - predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ModF dst src)); - effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS - - format %{ "FMOD $dst,$src" %} - ins_encode(Push_Reg_Mod_DPR(dst, src), - emitModDPR(), - Push_Result_Mod_DPR(src), - Pop_Reg_FPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{ - predicate(UseSSE>=1); - match(Set dst (ModF src0 src1)); - effect(KILL rax, KILL cr); - format %{ "SUB ESP,4\t # FMOD\n" - "\tMOVSS [ESP+0],$src1\n" - "\tFLD_S [ESP+0]\n" - "\tMOVSS [ESP+0],$src0\n" - "\tFLD_S [ESP+0]\n" - "loop:\tFPREM\n" - "\tFWAIT\n" - "\tFNSTSW AX\n" - "\tSAHF\n" - "\tJP loop\n" - "\tFSTP_S [ESP+0]\n" - "\tMOVSS $dst,[ESP+0]\n" - "\tADD ESP,4\n" - "\tFSTP ST0\t # Restore FPU Stack" - %} - ins_cost(250); - ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU); - ins_pipe( pipe_slow ); -%} - - -//----------Arithmetic Conversion Instructions--------------------------------- -// The conversions operations are all Alpha sorted. Please keep it that way! - -instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (RoundFloat src)); - ins_cost(125); - format %{ "FST_S $dst,$src\t# F-round" %} - ins_encode( Pop_Mem_Reg_FPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (RoundDouble src)); - ins_cost(125); - format %{ "FST_D $dst,$src\t# D-round" %} - ins_encode( Pop_Mem_Reg_DPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -// Force rounding to 24-bit precision and 6-bit exponent -instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{ - predicate(UseSSE==0); - match(Set dst (ConvD2F src)); - format %{ "FST_S $dst,$src\t# F-round" %} - expand %{ - roundFloat_mem_reg(dst,src); - %} -%} - -// Force rounding to 24-bit precision and 6-bit exponent -instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{ - predicate(UseSSE==1); - match(Set dst (ConvD2F src)); - effect( KILL cr ); - format %{ "SUB ESP,4\n\t" - "FST_S [ESP],$src\t# F-round\n\t" - "MOVSS $dst,[ESP]\n\t" - "ADD ESP,4" %} - ins_encode %{ - __ subptr(rsp, 4); - if ($src$$reg != FPR1L_enc) { - __ fld_s($src$$reg-1); - __ fstp_s(Address(rsp, 0)); - } else { - __ fst_s(Address(rsp, 0)); - } - __ movflt($dst$$XMMRegister, Address(rsp, 0)); - __ addptr(rsp, 4); - %} - ins_pipe( pipe_slow ); -%} - -// Force rounding double precision to single precision -instruct convD2F_reg(regF dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (ConvD2F src)); - format %{ "CVTSD2SS $dst,$src\t# F-round" %} - ins_encode %{ - __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2D src)); - format %{ "FST_S $dst,$src\t# D-round" %} - ins_encode( Pop_Reg_Reg_DPR(dst, src)); - ins_pipe( fpu_reg_reg ); -%} - -instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{ - predicate(UseSSE==1); - match(Set dst (ConvF2D src)); - format %{ "FST_D $dst,$src\t# D-round" %} - expand %{ - roundDouble_mem_reg(dst,src); - %} -%} - -instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{ - predicate(UseSSE==1); - match(Set dst (ConvF2D src)); - effect( KILL cr ); - format %{ "SUB ESP,4\n\t" - "MOVSS [ESP] $src\n\t" - "FLD_S [ESP]\n\t" - "ADD ESP,4\n\t" - "FSTP $dst\t# D-round" %} - ins_encode %{ - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ fstp_d($dst$$reg); - %} - ins_pipe( pipe_slow ); -%} - -instruct convF2D_reg(regD dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (ConvF2D src)); - format %{ "CVTSS2SD $dst,$src\t# D-round" %} - ins_encode %{ - __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{ - predicate(UseSSE<=1); - match(Set dst (ConvD2I src)); - effect( KILL tmp, KILL cr ); - format %{ "FLD $src\t# Convert double to int \n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,4\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "CMP EAX,0x80000000\n\t" - "JNE,s fast\n\t" - "FLD_D $src\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. If the double is a NAN, stuff a zero in instead. -instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{ - predicate(UseSSE>=2); - match(Set dst (ConvD2I src)); - effect( KILL tmp, KILL cr ); - format %{ "CVTTSD2SI $dst, $src\n\t" - "CMP $dst,0x80000000\n\t" - "JNE,s fast\n\t" - "SUB ESP, 8\n\t" - "MOVSD [ESP], $src\n\t" - "FLD_D [ESP]\n\t" - "ADD ESP, 8\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ cvttsd2sil($dst$$Register, $src$$XMMRegister); - __ cmpl($dst$$Register, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 8); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{ - predicate(UseSSE<=1); - match(Set dst (ConvD2L src)); - effect( KILL cr ); - format %{ "FLD $src\t# Convert double to long\n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,8\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode( Push_Reg_DPR(src), DPR2L_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{ - predicate (UseSSE>=2); - match(Set dst (ConvD2L src)); - effect( KILL cr ); - format %{ "SUB ESP,8\t# Convert double to long\n\t" - "MOVSD [ESP],$src\n\t" - "FLD_D [ESP]\n\t" - "FLDCW trunc mode\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "SUB ESP,8\n\t" - "MOVSD [ESP],$src\n\t" - "FLD_D [ESP]\n\t" - "ADD ESP,8\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); - __ fistp_d(Address(rsp, 0)); - // Restore the rounding mode, mask the exception - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } else { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Load the converted long, adjust CPU stack - __ pop(rax); - __ pop(rdx); - __ cmpl(rdx, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ testl(rax, rax); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 8); - __ movdbl(Address(rsp, 0), $src$$XMMRegister); - __ fld_d(Address(rsp, 0)); - __ addptr(rsp, 8); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -// Convert a double to an int. Java semantics require we do complex -// manglations in the corner cases. So we set the rounding mode to -// 'zero', store the darned double down as an int, and reset the -// rounding mode to 'nearest'. The hardware stores a flag value down -// if we would overflow or converted a NAN; we check for this and -// and go the slow path if needed. -instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2I src)); - effect( KILL tmp, KILL cr ); - format %{ "FLD $src\t# Convert float to int \n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,4\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "CMP EAX,0x80000000\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - // DPR2I_encoding works for FPR2I - ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// Convert a float in xmm to an int reg. -instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{ - predicate(UseSSE>=1); - match(Set dst (ConvF2I src)); - effect( KILL tmp, KILL cr ); - format %{ "CVTTSS2SI $dst, $src\n\t" - "CMP $dst,0x80000000\n\t" - "JNE,s fast\n\t" - "SUB ESP, 4\n\t" - "MOVSS [ESP], $src\n\t" - "FLD [ESP]\n\t" - "ADD ESP, 4\n\t" - "CALL d2i_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ cvttss2sil($dst$$Register, $src$$XMMRegister); - __ cmpl($dst$$Register, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2i_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{ - predicate(UseSSE==0); - match(Set dst (ConvF2L src)); - effect( KILL cr ); - format %{ "FLD $src\t# Convert float to long\n\t" - "FLDCW trunc mode\n\t" - "SUB ESP,8\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "FLD $src\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - // DPR2L_encoding works for FPR2L - ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) ); - ins_pipe( pipe_slow ); -%} - -// XMM lacks a float/double->long conversion, so use the old FPU stack. -instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{ - predicate (UseSSE>=1); - match(Set dst (ConvF2L src)); - effect( KILL cr ); - format %{ "SUB ESP,8\t# Convert float to long\n\t" - "MOVSS [ESP],$src\n\t" - "FLD_S [ESP]\n\t" - "FLDCW trunc mode\n\t" - "FISTp [ESP + #0]\n\t" - "FLDCW std/24-bit mode\n\t" - "POP EAX\n\t" - "POP EDX\n\t" - "CMP EDX,0x80000000\n\t" - "JNE,s fast\n\t" - "TEST EAX,EAX\n\t" - "JNE,s fast\n\t" - "SUB ESP,4\t# Convert float to long\n\t" - "MOVSS [ESP],$src\n\t" - "FLD_S [ESP]\n\t" - "ADD ESP,4\n\t" - "CALL d2l_wrapper\n" - "fast:" %} - ins_encode %{ - Label fast; - __ subptr(rsp, 8); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_trunc())); - __ fistp_d(Address(rsp, 0)); - // Restore the rounding mode, mask the exception - if (Compile::current()->in_24_bit_fp_mode()) { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_24())); - } else { - __ fldcw(ExternalAddress(StubRoutines::x86::addr_fpu_cntrl_wrd_std())); - } - // Load the converted long, adjust CPU stack - __ pop(rax); - __ pop(rdx); - __ cmpl(rdx, 0x80000000); - __ jccb(Assembler::notEqual, fast); - __ testl(rax, rax); - __ jccb(Assembler::notEqual, fast); - __ subptr(rsp, 4); - __ movflt(Address(rsp, 0), $src$$XMMRegister); - __ fld_s(Address(rsp, 0)); - __ addptr(rsp, 4); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::x86::d2l_wrapper()))); - __ post_call_nop(); - __ bind(fast); - %} - ins_pipe( pipe_slow ); -%} - -instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{ - predicate( UseSSE<=1 ); - match(Set dst (ConvI2D src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -instruct convI2D_reg(regD dst, rRegI src) %{ - predicate( UseSSE>=2 && !UseXmmI2D ); - match(Set dst (ConvI2D src)); - format %{ "CVTSI2SD $dst,$src" %} - ins_encode %{ - __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct convI2D_mem(regD dst, memory mem) %{ - predicate( UseSSE>=2 ); - match(Set dst (ConvI2D (LoadI mem))); - format %{ "CVTSI2SD $dst,$mem" %} - ins_encode %{ - __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address); - %} - ins_pipe( pipe_slow ); -%} - -instruct convXI2D_reg(regD dst, rRegI src) -%{ - predicate( UseSSE>=2 && UseXmmI2D ); - match(Set dst (ConvI2D src)); - - format %{ "MOVD $dst,$src\n\t" - "CVTDQ2PD $dst,$dst\t# i2d" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); // XXX -%} - -instruct convI2DPR_mem(regDPR dst, memory mem) %{ - predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2D (LoadI mem))); - format %{ "FILD $mem\n\t" - "FSTP $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_DPR(dst), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -// Convert a byte to a float; no rounding step needed. -instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{ - predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 ); - match(Set dst (ConvI2F src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -// In 24-bit mode, force exponent rounding by storing back out -instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{ - predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F src)); - ins_cost(200); - format %{ "FILD $src\n\t" - "FSTP_S $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode( Push_Mem_I(src), - Pop_Mem_FPR(dst)); - ins_pipe( fpu_mem_mem ); -%} - -// In 24-bit mode, force exponent rounding by storing back out -instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{ - predicate( UseSSE==0 && Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F (LoadI mem))); - ins_cost(200); - format %{ "FILD $mem\n\t" - "FSTP_S $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Mem_FPR(dst), ClearInstMark); - ins_pipe( fpu_mem_mem ); -%} - -// This instruction does not round to 24-bits -instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{ - predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F src)); - format %{ "FILD $src\n\t" - "FSTP $dst" %} - opcode(0xDB, 0x0); /* DB /0 */ - ins_encode( Push_Mem_I(src), - Pop_Reg_FPR(dst)); - ins_pipe( fpu_reg_mem ); -%} - -// This instruction does not round to 24-bits -instruct convI2FPR_mem(regFPR dst, memory mem) %{ - predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr()); - match(Set dst (ConvI2F (LoadI mem))); - format %{ "FILD $mem\n\t" - "FSTP $dst" %} - opcode(0xDB); /* DB /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,mem), - Pop_Reg_FPR(dst), ClearInstMark); - ins_pipe( fpu_reg_mem ); -%} - -// Convert an int to a float in xmm; no rounding step needed. -instruct convI2F_reg(regF dst, rRegI src) %{ - predicate( UseSSE==1 || ( UseSSE>=2 && !UseXmmI2F )); - match(Set dst (ConvI2F src)); - format %{ "CVTSI2SS $dst, $src" %} - ins_encode %{ - __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - - instruct convXI2F_reg(regF dst, rRegI src) -%{ - predicate( UseSSE>=2 && UseXmmI2F ); - match(Set dst (ConvI2F src)); - - format %{ "MOVD $dst,$src\n\t" - "CVTDQ2PS $dst,$dst\t# i2f" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister); - %} - ins_pipe(pipe_slow); // XXX -%} - -instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{ - match(Set dst (ConvI2L src)); - effect(KILL cr); - ins_cost(375); - format %{ "MOV $dst.lo,$src\n\t" - "MOV $dst.hi,$src\n\t" - "SAR $dst.hi,31" %} - ins_encode(convert_int_long(dst,src)); - ins_pipe( ialu_reg_reg_long ); -%} - -// Zero-extend convert int to long -instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{ - match(Set dst (AndL (ConvI2L src) mask) ); - effect( KILL flags ); - ins_cost(250); - format %{ "MOV $dst.lo,$src\n\t" - "XOR $dst.hi,$dst.hi" %} - opcode(0x33); // XOR - ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Zero-extend long -instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{ - match(Set dst (AndL src mask) ); - effect( KILL flags ); - ins_cost(250); - format %{ "MOV $dst.lo,$src.lo\n\t" - "XOR $dst.hi,$dst.hi\n\t" %} - opcode(0x33); // XOR - ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) ); - ins_pipe( ialu_reg_reg_long ); -%} - -instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE<=1); - match(Set dst (ConvL2D src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to double\n\t" - "PUSH $src.lo\n\t" - "FILD ST,[ESP + #0]\n\t" - "ADD ESP,8\n\t" - "FSTP_D $dst\t# D-round" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_DPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE>=2); - match(Set dst (ConvL2D src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to double\n\t" - "PUSH $src.lo\n\t" - "FILD_D [ESP]\n\t" - "FSTP_D [ESP]\n\t" - "MOVSD $dst,[ESP]\n\t" - "ADD ESP,8" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultD(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{ - predicate (UseSSE>=1); - match(Set dst (ConvL2F src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to single float\n\t" - "PUSH $src.lo\n\t" - "FILD_D [ESP]\n\t" - "FSTP_S [ESP]\n\t" - "MOVSS $dst,[ESP]\n\t" - "ADD ESP,8" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8)); - ins_pipe( pipe_slow ); -%} - -instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{ - match(Set dst (ConvL2F src)); - effect( KILL cr ); - format %{ "PUSH $src.hi\t# Convert long to single float\n\t" - "PUSH $src.lo\n\t" - "FILD ST,[ESP + #0]\n\t" - "ADD ESP,8\n\t" - "FSTP_S $dst\t# F-round" %} - opcode(0xDF, 0x5); /* DF /5 */ - ins_encode(convert_long_double(src), Pop_Mem_FPR(dst)); - ins_pipe( pipe_slow ); -%} - -instruct convL2I_reg( rRegI dst, eRegL src ) %{ - match(Set dst (ConvL2I src)); - effect( DEF dst, USE src ); - format %{ "MOV $dst,$src.lo" %} - ins_encode(enc_CopyL_Lo(dst,src)); - ins_pipe( ialu_reg_reg ); -%} - -instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{ - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - ins_cost(100); - format %{ "MOV $dst,$src\t# MoveF2I_stack_reg" %} - ins_encode %{ - __ movl($dst$$Register, Address(rsp, $src$$disp)); - %} - ins_pipe( ialu_reg_mem ); -%} - -instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{ - predicate(UseSSE==0); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - - ins_cost(125); - format %{ "FST_S $dst,$src\t# MoveF2I_reg_stack" %} - ins_encode( Pop_Mem_Reg_FPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{ - predicate(UseSSE>=1); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - - ins_cost(95); - format %{ "MOVSS $dst,$src\t# MoveF2I_reg_stack_sse" %} - ins_encode %{ - __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveF2I src)); - effect( DEF dst, USE src ); - ins_cost(85); - format %{ "MOVD $dst,$src\t# MoveF2I_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$Register, $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{ - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(100); - format %{ "MOV $dst,$src\t# MoveI2F_reg_stack" %} - ins_encode %{ - __ movl(Address(rsp, $dst$$disp), $src$$Register); - %} - ins_pipe( ialu_mem_reg ); -%} - - -instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{ - predicate(UseSSE==0); - match(Set dst (MoveI2F src)); - effect(DEF dst, USE src); - - ins_cost(125); - format %{ "FLD_S $src\n\t" - "FSTP $dst\t# MoveI2F_stack_reg" %} - opcode(0xD9); /* D9 /0, FLD m32real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_FPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - -instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{ - predicate(UseSSE>=1); - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(95); - format %{ "MOVSS $dst,$src\t# MoveI2F_stack_reg_sse" %} - ins_encode %{ - __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveI2F src)); - effect( DEF dst, USE src ); - - ins_cost(85); - format %{ "MOVD $dst,$src\t# MoveI2F_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{ - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - - ins_cost(250); - format %{ "MOV $dst.lo,$src\n\t" - "MOV $dst.hi,$src+4\t# MoveD2L_stack_reg" %} - opcode(0x8B, 0x8B); - ins_encode( SetInstMark, OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src), ClearInstMark); - ins_pipe( ialu_mem_long_reg ); -%} - -instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{ - predicate(UseSSE<=1); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - - ins_cost(125); - format %{ "FST_D $dst,$src\t# MoveD2L_reg_stack" %} - ins_encode( Pop_Mem_Reg_DPR(dst, src) ); - ins_pipe( fpu_mem_reg ); -%} - -instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{ - predicate(UseSSE>=2); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src); - ins_cost(95); - format %{ "MOVSD $dst,$src\t# MoveD2L_reg_stack_sse" %} - ins_encode %{ - __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{ - predicate(UseSSE>=2); - match(Set dst (MoveD2L src)); - effect(DEF dst, USE src, TEMP tmp); - ins_cost(85); - format %{ "MOVD $dst.lo,$src\n\t" - "PSHUFLW $tmp,$src,0x4E\n\t" - "MOVD $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$Register, $src$$XMMRegister); - __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e); - __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{ - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(200); - format %{ "MOV $dst,$src.lo\n\t" - "MOV $dst+4,$src.hi\t# MoveL2D_reg_stack" %} - opcode(0x89, 0x89); - ins_encode( SetInstMark, OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ), ClearInstMark ); - ins_pipe( ialu_mem_long_reg ); -%} - - -instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{ - predicate(UseSSE<=1); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - ins_cost(125); - - format %{ "FLD_D $src\n\t" - "FSTP $dst\t# MoveL2D_stack_reg" %} - opcode(0xDD); /* DD /0, FLD m64real */ - ins_encode( SetInstMark, OpcP, RMopc_Mem_no_oop(0x00,src), - Pop_Reg_DPR(dst), ClearInstMark ); - ins_pipe( fpu_reg_mem ); -%} - - -instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{ - predicate(UseSSE>=2 && UseXmmLoadAndClearUpper); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(95); - format %{ "MOVSD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{ - predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper); - match(Set dst (MoveL2D src)); - effect(DEF dst, USE src); - - ins_cost(95); - format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %} - ins_encode %{ - __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp)); - %} - ins_pipe( pipe_slow ); -%} - -instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{ - predicate(UseSSE>=2); - match(Set dst (MoveL2D src)); - effect(TEMP dst, USE src, TEMP tmp); - ins_cost(85); - format %{ "MOVD $dst,$src.lo\n\t" - "MOVD $tmp,$src.hi\n\t" - "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %} - ins_encode %{ - __ movdl($dst$$XMMRegister, $src$$Register); - __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); - __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); - %} - ins_pipe( pipe_slow ); -%} - -//----------------------------- CompressBits/ExpandBits ------------------------ - -instruct compressBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ - predicate(n->bottom_type()->isa_long()); - match(Set dst (CompressBits src mask)); - effect(TEMP rtmp, TEMP xtmp, KILL cr); - format %{ "compress_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} - ins_encode %{ - Label exit, partail_result; - // Parallely extract both upper and lower 32 bits of source into destination register pair. - // Merge the results of upper and lower destination registers such that upper destination - // results are contiguously laid out after the lower destination result. - __ pextl($dst$$Register, $src$$Register, $mask$$Register); - __ pextl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); - __ popcntl($rtmp$$Register, $mask$$Register); - // Skip merging if bit count of lower mask register is equal to 32 (register size). - __ cmpl($rtmp$$Register, 32); - __ jccb(Assembler::equal, exit); - // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. - __ movdl($xtmp$$XMMRegister, $rtmp$$Register); - // Shift left the contents of upper destination register by true bit count of lower mask register - // and merge with lower destination register. - __ shlxl($rtmp$$Register, HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ orl($dst$$Register, $rtmp$$Register); - __ movdl($rtmp$$Register, $xtmp$$XMMRegister); - // Zero out upper destination register if true bit count of lower 32 bit mask is zero - // since contents of upper destination have already been copied to lower destination - // register. - __ cmpl($rtmp$$Register, 0); - __ jccb(Assembler::greater, partail_result); - __ movl(HIGH_FROM_LOW($dst$$Register), 0); - __ jmp(exit); - __ bind(partail_result); - // Perform right shift over upper destination register to move out bits already copied - // to lower destination register. - __ subl($rtmp$$Register, 32); - __ negl($rtmp$$Register); - __ shrxl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ bind(exit); - %} - ins_pipe( pipe_slow ); -%} - -instruct expandBitsL_reg(eADXRegL dst, eBCXRegL src, eBDPRegL mask, eSIRegI rtmp, regF xtmp, eFlagsReg cr) %{ - predicate(n->bottom_type()->isa_long()); - match(Set dst (ExpandBits src mask)); - effect(TEMP rtmp, TEMP xtmp, KILL cr); - format %{ "expand_bits $dst, $src, $mask\t! using $rtmp and $xtmp as TEMP" %} - ins_encode %{ - // Extraction operation sequentially reads the bits from source register starting from LSB - // and lays them out into destination register at bit locations corresponding to true bits - // in mask register. Thus number of source bits read are equal to combined true bit count - // of mask register pair. - Label exit, mask_clipping; - __ pdepl($dst$$Register, $src$$Register, $mask$$Register); - __ pdepl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($src$$Register), HIGH_FROM_LOW($mask$$Register)); - __ popcntl($rtmp$$Register, $mask$$Register); - // If true bit count of lower mask register is 32 then none of bit of lower source register - // will feed to upper destination register. - __ cmpl($rtmp$$Register, 32); - __ jccb(Assembler::equal, exit); - // Due to constraint on number of GPRs on 32 bit target, using XMM register as potential spill slot. - __ movdl($xtmp$$XMMRegister, $rtmp$$Register); - // Shift right the contents of lower source register to remove already consumed bits. - __ shrxl($rtmp$$Register, $src$$Register, $rtmp$$Register); - // Extract the bits from lower source register starting from LSB under the influence - // of upper mask register. - __ pdepl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register, HIGH_FROM_LOW($mask$$Register)); - __ movdl($rtmp$$Register, $xtmp$$XMMRegister); - __ subl($rtmp$$Register, 32); - __ negl($rtmp$$Register); - __ movdl($xtmp$$XMMRegister, $mask$$Register); - __ movl($mask$$Register, HIGH_FROM_LOW($mask$$Register)); - // Clear the set bits in upper mask register which have been used to extract the contents - // from lower source register. - __ bind(mask_clipping); - __ blsrl($mask$$Register, $mask$$Register); - __ decrementl($rtmp$$Register, 1); - __ jccb(Assembler::greater, mask_clipping); - // Starting from LSB extract the bits from upper source register under the influence of - // remaining set bits in upper mask register. - __ pdepl($rtmp$$Register, HIGH_FROM_LOW($src$$Register), $mask$$Register); - // Merge the partial results extracted from lower and upper source register bits. - __ orl(HIGH_FROM_LOW($dst$$Register), $rtmp$$Register); - __ movdl($mask$$Register, $xtmp$$XMMRegister); - __ bind(exit); - %} - ins_pipe( pipe_slow ); -%} - -// ======================================================================= -// Fast clearing of an array -// Small non-constant length ClearArray for non-AVX512 targets. -instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2)); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); - - format %{ $$template - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"CMP InitArrayShortSize,rcx\n\t" - $$emit$$"JG LARGE\n\t" - $$emit$$"SHL ECX, 1\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JS DONE\t# Zero length\n\t" - $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JGE LOOP\n\t" - $$emit$$"JMP DONE\n\t" - $$emit$$"# LARGE:\n\t" - if (UseFastStosb) { - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -// Small non-constant length ClearArray for AVX512 targets. -instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2)); - match(Set dummy (ClearArray cnt base)); - ins_cost(125); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); - - format %{ $$template - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"CMP InitArrayShortSize,rcx\n\t" - $$emit$$"JG LARGE\n\t" - $$emit$$"SHL ECX, 1\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JS DONE\t# Zero length\n\t" - $$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t" - $$emit$$"DEC ECX\n\t" - $$emit$$"JGE LOOP\n\t" - $$emit$$"JMP DONE\n\t" - $$emit$$"# LARGE:\n\t" - if (UseFastStosb) { - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, false, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Large non-constant length ClearArray for non-AVX512 targets. -instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate((UseAVX <= 2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr); - format %{ $$template - if (UseFastStosb) { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -// Large non-constant length ClearArray for AVX512 targets. -instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, legRegD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ - predicate((UseAVX > 2) && ((ClearArrayNode*)n)->is_large()); - match(Set dummy (ClearArray cnt base)); - effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr); - format %{ $$template - if (UseFastStosb) { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t" - $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t" - } else if (UseXMMForObjInit) { - $$emit$$"MOV RDI,RAX\t# ClearArray:\n\t" - $$emit$$"VPXOR YMM0,YMM0,YMM0\n\t" - $$emit$$"JMPQ L_zero_64_bytes\n\t" - $$emit$$"# L_loop:\t# 64-byte LOOP\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t" - $$emit$$"ADD 0x40,RAX\n\t" - $$emit$$"# L_zero_64_bytes:\n\t" - $$emit$$"SUB 0x8,RCX\n\t" - $$emit$$"JGE L_loop\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JL L_tail\n\t" - $$emit$$"VMOVDQU YMM0,(RAX)\n\t" - $$emit$$"ADD 0x20,RAX\n\t" - $$emit$$"SUB 0x4,RCX\n\t" - $$emit$$"# L_tail:\t# Clearing tail bytes\n\t" - $$emit$$"ADD 0x4,RCX\n\t" - $$emit$$"JLE L_end\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"# L_sloop:\t# 8-byte short loop\n\t" - $$emit$$"VMOVQ XMM0,(RAX)\n\t" - $$emit$$"ADD 0x8,RAX\n\t" - $$emit$$"DEC RCX\n\t" - $$emit$$"JGE L_sloop\n\t" - $$emit$$"# L_end:\n\t" - } else { - $$emit$$"XOR EAX,EAX\t# ClearArray:\n\t" - $$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t" - $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t" - } - $$emit$$"# DONE" - %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, - $tmp$$XMMRegister, true, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// Small constant length ClearArray for AVX512 targets. -instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr) -%{ - predicate(!((ClearArrayNode*)n)->is_large() && (MaxVectorSize >= 32) && VM_Version::supports_avx512vl()); - match(Set dummy (ClearArray cnt base)); - ins_cost(100); - effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr); - format %{ "clear_mem_imm $base , $cnt \n\t" %} - ins_encode %{ - __ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister); - %} - ins_pipe(pipe_slow); -%} - -instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, - eAXRegI result, regD tmp1, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str2$$Register, $str1$$Register, - $cnt2$$Register, $cnt1$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2, - eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); - match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr); - - format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %} - ins_encode %{ - __ string_compare($str2$$Register, $str1$$Register, - $cnt2$$Register, $cnt1$$Register, $result$$Register, - $tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// fast string equals -instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, - regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw()); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); - - format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ arrays_equals(false, $str1$$Register, $str2$$Register, - $cnt$$Register, $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); - %} - - ins_pipe( pipe_slow ); -%} - -instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw()); - match(Set result (StrEquals (Binary str1 str2) cnt)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr); - - format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ arrays_equals(false, $str1$$Register, $str2$$Register, - $cnt$$Register, $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); - %} - - ins_pipe( pipe_slow ); -%} - - -// fast search of substring with known size. -instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 16) { - // IndexOf for constant substrings with size >= 16 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - } - %} - ins_pipe( pipe_slow ); -%} - -// fast search of substring with known size. -instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 8) { - // IndexOf for constant substrings with size >= 8 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - } - %} - ins_pipe( pipe_slow ); -%} - -// fast search of substring with known size. -instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2, - eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result // KILL $vec1, $cnt1, $cnt2, $tmp" %} - ins_encode %{ - int icnt2 = (int)$int_cnt2$$constant; - if (icnt2 >= 8) { - // IndexOf for constant substrings with size >= 8 elements - // which don't need to be loaded through stack. - __ string_indexofC8($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - } else { - // Small strings are loaded through stack if they cross page boundary. - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - icnt2, $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - } - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2, - eBXRegI result, regD vec1, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL)); - match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2))); - effect(TEMP vec1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr); - - format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL all" %} - ins_encode %{ - __ string_indexof($str1$$Register, $str2$$Register, - $cnt1$$Register, $cnt2$$Register, - (-1), $result$$Register, - $vec1$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); - format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - -instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch, - eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{ - predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L)); - match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); - effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr); - format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %} - ins_encode %{ - __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register, - $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register); - %} - ins_pipe( pipe_slow ); -%} - - -// fast array equals -instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU); - match(Set result (AryEq ary1 ary2)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); - //ins_cost(300); - - format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %} - ins_encode %{ - __ arrays_equals(true, $ary1$$Register, $ary2$$Register, - $tmp3$$Register, $result$$Register, $tmp4$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -instruct count_positives(eSIRegP ary1, eCXRegI len, eAXRegI result, - regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) -%{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set result (CountPositives ary1 len)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); - - format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ count_positives($ary1$$Register, $len$$Register, - $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct count_positives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result, - regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr) -%{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set result (CountPositives ary1 len)); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr); - - format %{ "countPositives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %} - ins_encode %{ - __ count_positives($ary1$$Register, $len$$Register, - $result$$Register, $tmp3$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - - -// fast char[] to byte[] compression -instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, - regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set result (StrCompressedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} - ins_encode %{ - __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, - knoreg, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, - regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set result (StrCompressedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %} - ins_encode %{ - __ char_array_compress($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, - $ktmp1$$KRegister, $ktmp2$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// fast byte[] to char[] inflation -instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(!VM_Version::supports_avx512vlbw() || !VM_Version::supports_bmi2()); - match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); - - format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} - ins_encode %{ - __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$Register, knoreg); - %} - ins_pipe( pipe_slow ); -%} - -instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{ - predicate(VM_Version::supports_avx512vlbw() && VM_Version::supports_bmi2()); - match(Set dummy (StrInflatedCopy src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr); - - format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} - ins_encode %{ - __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister); - %} - ins_pipe( pipe_slow ); -%} - -// encode char[] to byte[] in ISO_8859_1 -instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, regD tmp2, regD tmp3, regD tmp4, - eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(!((EncodeISOArrayNode*)n)->is_ascii()); - match(Set result (EncodeISOArray src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "Encode iso array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} - ins_encode %{ - __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, false); - %} - ins_pipe( pipe_slow ); -%} - -// encode char[] to byte[] in ASCII -instruct encode_ascii_array(eSIRegP src, eDIRegP dst, eDXRegI len, - regD tmp1, regD tmp2, regD tmp3, regD tmp4, - eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{ - predicate(((EncodeISOArrayNode*)n)->is_ascii()); - match(Set result (EncodeISOArray src (Binary dst len))); - effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr); - - format %{ "Encode ascii array $src,$dst,$len -> $result // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %} - ins_encode %{ - __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, - $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register, true); - %} - ins_pipe( pipe_slow ); -%} - -//----------Control Flow Instructions------------------------------------------ -// Signed compare Instructions -instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{ - match(Set cr (CmpI op1 op2)); - effect( DEF cr, USE op1, USE op2 ); - format %{ "CMP $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{ - match(Set cr (CmpI op1 op2)); - effect( DEF cr, USE op1 ); - format %{ "CMP $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - // ins_encode( RegImm( op1, op2) ); /* Was CmpImm */ - ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Cisc-spilled version of cmpI_eReg -instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{ - match(Set cr (CmpI op1 (LoadI op2))); - - format %{ "CMP $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{ - match(Set cr (CmpI src zero)); - effect( DEF cr, USE src ); - - format %{ "TEST $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{ - match(Set cr (CmpI (AndI src con) zero)); - - format %{ "TEST $src,$con" %} - opcode(0xF7,0x00); - ins_encode( OpcP, RegOpc(src), Con32(con) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{ - match(Set cr (CmpI (AndI src mem) zero)); - - format %{ "TEST $src,$mem" %} - opcode(0x85); - ins_encode( SetInstMark, OpcP, RegMem( src, mem ), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// Unsigned compare Instructions; really, same as signed except they -// produce an eFlagsRegU instead of eFlagsReg. -instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{ - match(Set cr (CmpU op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{ - match(Set cr (CmpU op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// // Cisc-spilled version of cmpU_eReg -instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{ - match(Set cr (CmpU op1 (LoadI op2))); - - format %{ "CMPu $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// // Cisc-spilled version of cmpU_eReg -//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{ -// match(Set cr (CmpU (LoadI op1) op2)); -// -// format %{ "CMPu $op1,$op2" %} -// ins_cost(500); -// opcode(0x39); /* Opcode 39 /r */ -// ins_encode( OpcP, RegMem( op1, op2) ); -//%} - -instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{ - match(Set cr (CmpU src zero)); - - format %{ "TESTu $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Unsigned pointer compare Instructions -instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{ - match(Set cr (CmpP op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( OpcP, RegReg( op1, op2) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{ - match(Set cr (CmpP op1 op2)); - - format %{ "CMPu $op1,$op2" %} - opcode(0x81,0x07); /* Opcode 81 /7 */ - ins_encode( SetInstMark, OpcSErm( op1, op2 ), Con8or32( op2 ), ClearInstMark ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// // Cisc-spilled version of cmpP_eReg -instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{ - match(Set cr (CmpP op1 (LoadP op2))); - - format %{ "CMPu $op1,$op2" %} - ins_cost(500); - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// // Cisc-spilled version of cmpP_eReg -//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{ -// match(Set cr (CmpP (LoadP op1) op2)); -// -// format %{ "CMPu $op1,$op2" %} -// ins_cost(500); -// opcode(0x39); /* Opcode 39 /r */ -// ins_encode( OpcP, RegMem( op1, op2) ); -//%} - -// Compare raw pointer (used in out-of-heap check). -// Only works because non-oop pointers must be raw pointers -// and raw pointers have no anti-dependencies. -instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{ - predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none ); - match(Set cr (CmpP op1 (LoadP op2))); - - format %{ "CMPu $op1,$op2" %} - opcode(0x3B); /* Opcode 3B /r */ - ins_encode( SetInstMark, OpcP, RegMem( op1, op2), ClearInstMark ); - ins_pipe( ialu_cr_reg_mem ); -%} - -// -// This will generate a signed flags result. This should be ok -// since any compare to a zero should be eq/neq. -instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{ - match(Set cr (CmpP src zero)); - - format %{ "TEST $src,$src" %} - opcode(0x85); - ins_encode( OpcP, RegReg( src, src ) ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Cisc-spilled version of testP_reg -// This will generate a signed flags result. This should be ok -// since any compare to a zero should be eq/neq. -instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{ - match(Set cr (CmpP (LoadP op) zero)); - - format %{ "TEST $op,0xFFFFFFFF" %} - ins_cost(500); - opcode(0xF7); /* Opcode F7 /0 */ - ins_encode( SetInstMark, OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF), ClearInstMark ); - ins_pipe( ialu_cr_reg_imm ); -%} - -// Yanked all unsigned pointer compare operations. -// Pointer compares are done with CmpP which is already unsigned. - -//----------Max and Min-------------------------------------------------------- -// Min Instructions -//// -// *** Min and Max using the conditional move are slower than the -// *** branch version on a Pentium III. -// // Conditional move for min -//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ -// effect( USE_DEF op2, USE op1, USE cr ); -// format %{ "CMOVlt $op2,$op1\t! min" %} -// opcode(0x4C,0x0F); -// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -// ins_pipe( pipe_cmov_reg ); -//%} -// -//// Min Register with Register (P6 version) -//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set op2 (MinI op1 op2)); -// ins_cost(200); -// expand %{ -// eFlagsReg cr; -// compI_eReg(cr,op1,op2); -// cmovI_reg_lt(op2,op1,cr); -// %} -//%} - -// Min Register with Register (generic version) -instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ - match(Set dst (MinI dst src)); - effect(KILL flags); - ins_cost(300); - - format %{ "MIN $dst,$src" %} - opcode(0xCC); - ins_encode( min_enc(dst,src) ); - ins_pipe( pipe_slow ); -%} - -// Max Register with Register -// *** Min and Max using the conditional move are slower than the -// *** branch version on a Pentium III. -// // Conditional move for max -//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{ -// effect( USE_DEF op2, USE op1, USE cr ); -// format %{ "CMOVgt $op2,$op1\t! max" %} -// opcode(0x4F,0x0F); -// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -// ins_pipe( pipe_cmov_reg ); -//%} -// -// // Max Register with Register (P6 version) -//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{ -// predicate(VM_Version::supports_cmov() ); -// match(Set op2 (MaxI op1 op2)); -// ins_cost(200); -// expand %{ -// eFlagsReg cr; -// compI_eReg(cr,op1,op2); -// cmovI_reg_gt(op2,op1,cr); -// %} -//%} - -// Max Register with Register (generic version) -instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{ - match(Set dst (MaxI dst src)); - effect(KILL flags); - ins_cost(300); - - format %{ "MAX $dst,$src" %} - opcode(0xCC); - ins_encode( max_enc(dst,src) ); - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Counted Loop limit node which represents exact final iterator value. -// Note: the resulting value should fit into integer range since -// counted loops have limit check on overflow. -instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{ - match(Set limit (LoopLimit (Binary init limit) stride)); - effect(TEMP limit_hi, TEMP tmp, KILL flags); - ins_cost(300); - - format %{ "loopLimit $init,$limit,$stride # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %} - ins_encode %{ - int strd = (int)$stride$$constant; - assert(strd != 1 && strd != -1, "sanity"); - int m1 = (strd > 0) ? 1 : -1; - // Convert limit to long (EAX:EDX) - __ cdql(); - // Convert init to long (init:tmp) - __ movl($tmp$$Register, $init$$Register); - __ sarl($tmp$$Register, 31); - // $limit - $init - __ subl($limit$$Register, $init$$Register); - __ sbbl($limit_hi$$Register, $tmp$$Register); - // + ($stride - 1) - if (strd > 0) { - __ addl($limit$$Register, (strd - 1)); - __ adcl($limit_hi$$Register, 0); - __ movl($tmp$$Register, strd); - } else { - __ addl($limit$$Register, (strd + 1)); - __ adcl($limit_hi$$Register, -1); - __ lneg($limit_hi$$Register, $limit$$Register); - __ movl($tmp$$Register, -strd); - } - // signed division: (EAX:EDX) / pos_stride - __ idivl($tmp$$Register); - if (strd < 0) { - // restore sign - __ negl($tmp$$Register); - } - // (EAX) * stride - __ mull($tmp$$Register); - // + init (ignore upper bits) - __ addl($limit$$Register, $init$$Register); - %} - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Branch Instructions -// Jump Table -instruct jumpXtnd(rRegI switch_val) %{ - match(Jump switch_val); - ins_cost(350); - format %{ "JMP [$constantaddress](,$switch_val,1)\n\t" %} - ins_encode %{ - // Jump to Address(table_base + switch_reg) - Address index(noreg, $switch_val$$Register, Address::times_1); - __ jump(ArrayAddress($constantaddress, index), noreg); - %} - ins_pipe(pipe_jmp); -%} - -// Jump Direct - Label defines a relative address from JMP+1 -instruct jmpDir(label labl) %{ - match(Goto); - effect(USE labl); - - ins_cost(300); - format %{ "JMP $labl" %} - size(5); - ins_encode %{ - Label* L = $labl$$label; - __ jmp(*L, false); // Always long jump - %} - ins_pipe( pipe_jmp ); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{ - match(If cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe( pipe_jcc ); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{ - match(CountedLoopEnd cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop $labl\t# Loop end" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe( pipe_jcc ); -%} - -// Jump Direct Conditional - using unsigned comparison -instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,u $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe(pipe_jcc); -%} - -instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(200); - format %{ "J$cop,u $labl" %} - size(6); - ins_encode %{ - Label* L = $labl$$label; - __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump - %} - ins_pipe(pipe_jcc); -%} - -instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(200); - format %{ $$template - if ($cop$$cmpcode == Assembler::notEqual) { - $$emit$$"JP,u $labl\n\t" - $$emit$$"J$cop,u $labl" - } else { - $$emit$$"JP,u done\n\t" - $$emit$$"J$cop,u $labl\n\t" - $$emit$$"done:" - } - %} - ins_encode %{ - Label* l = $labl$$label; - if ($cop$$cmpcode == Assembler::notEqual) { - __ jcc(Assembler::parity, *l, false); - __ jcc(Assembler::notEqual, *l, false); - } else if ($cop$$cmpcode == Assembler::equal) { - Label done; - __ jccb(Assembler::parity, done); - __ jcc(Assembler::equal, *l, false); - __ bind(done); - } else { - ShouldNotReachHere(); - } - %} - ins_pipe(pipe_jcc); -%} - -// ============================================================================ -// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass -// array for an instance of the superklass. Set a hidden internal cache on a -// hit (cache is checked with exposed code in gen_subtype_check()). Return -// NZ for a miss or zero for a hit. The encoding ALSO sets flags. -instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{ - match(Set result (PartialSubtypeCheck sub super)); - effect( KILL rcx, KILL cr ); - - ins_cost(1100); // slightly larger than the next version - format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" - "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" - "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" - "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" - "JNE,s miss\t\t# Missed: EDI not-zero\n\t" - "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t" - "XOR $result,$result\t\t Hit: EDI zero\n\t" - "miss:\t" %} - - opcode(0x1); // Force a XOR of EDI - ins_encode( enc_PartialSubtypeCheck() ); - ins_pipe( pipe_slow ); -%} - -instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{ - match(Set cr (CmpP (PartialSubtypeCheck sub super) zero)); - effect( KILL rcx, KILL result ); - - ins_cost(1000); - format %{ "MOV EDI,[$sub+Klass::secondary_supers]\n\t" - "MOV ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t" - "ADD EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t" - "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t" - "JNE,s miss\t\t# Missed: flags NZ\n\t" - "MOV [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t" - "miss:\t" %} - - opcode(0x0); // No need to XOR EDI - ins_encode( enc_PartialSubtypeCheck() ); - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Branch Instructions -- short offset versions -// -// These instructions are used to replace jumps of a long offset (the default -// match) with jumps of a shorter offset. These instructions are all tagged -// with the ins_short_branch attribute, which causes the ADLC to suppress the -// match rules in general matching. Instead, the ADLC generates a conversion -// method in the MachNode which can be used to do in-place replacement of the -// long variant with the shorter variant. The compiler will determine if a -// branch can be taken by the is_short_branch_offset() predicate in the machine -// specific code section of the file. - -// Jump Direct - Label defines a relative address from JMP+1 -instruct jmpDir_short(label labl) %{ - match(Goto); - effect(USE labl); - - ins_cost(300); - format %{ "JMP,s $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jmpb(*L); - %} - ins_pipe( pipe_jmp ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{ - match(If cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,s $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - Label defines a relative address from Jcc+1 -instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{ - match(CountedLoopEnd cop cr); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,s $labl\t# Loop end" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -// Jump Direct Conditional - using unsigned comparison -instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,us $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ "J$cop,us $labl" %} - size(2); - ins_encode %{ - Label* L = $labl$$label; - __ jccb((Assembler::Condition)($cop$$cmpcode), *L); - %} - ins_pipe( pipe_jcc ); - ins_short_branch(1); -%} - -instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{ - match(If cop cmp); - effect(USE labl); - - ins_cost(300); - format %{ $$template - if ($cop$$cmpcode == Assembler::notEqual) { - $$emit$$"JP,u,s $labl\n\t" - $$emit$$"J$cop,u,s $labl" - } else { - $$emit$$"JP,u,s done\n\t" - $$emit$$"J$cop,u,s $labl\n\t" - $$emit$$"done:" - } - %} - size(4); - ins_encode %{ - Label* l = $labl$$label; - if ($cop$$cmpcode == Assembler::notEqual) { - __ jccb(Assembler::parity, *l); - __ jccb(Assembler::notEqual, *l); - } else if ($cop$$cmpcode == Assembler::equal) { - Label done; - __ jccb(Assembler::parity, done); - __ jccb(Assembler::equal, *l); - __ bind(done); - } else { - ShouldNotReachHere(); - } - %} - ins_pipe(pipe_jcc); - ins_short_branch(1); -%} - -// ============================================================================ -// Long Compare -// -// Currently we hold longs in 2 registers. Comparing such values efficiently -// is tricky. The flavor of compare used depends on whether we are testing -// for LT, LE, or EQ. For a simple LT test we can check just the sign bit. -// The GE test is the negated LT test. The LE test can be had by commuting -// the operands (yielding a GE test) and then negating; negate again for the -// GT test. The EQ test is done by ORcc'ing the high and low halves, and the -// NE test is negated from that. - -// Due to a shortcoming in the ADLC, it mixes up expressions like: -// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)). Note the -// difference between 'Y' and '0L'. The tree-matches for the CmpI sections -// are collapsed internally in the ADLC's dfa-gen code. The match for -// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the -// foo match ends up with the wrong leaf. One fix is to not match both -// reg-reg and reg-zero forms of long-compare. This is unfortunate because -// both forms beat the trinary form of long-compare and both are very useful -// on Intel which has so few registers. - -// Manifest a CmpL result in an integer register. Very painful. -// This is the test to avoid. -instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{ - match(Set dst (CmpL3 src1 src2)); - effect( KILL flags ); - ins_cost(1000); - format %{ "XOR $dst,$dst\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "JLT,s m_one\n\t" - "JGT,s p_one\n\t" - "CMP $src1.lo,$src2.lo\n\t" - "JB,s m_one\n\t" - "JEQ,s done\n" - "p_one:\tINC $dst\n\t" - "JMP,s done\n" - "m_one:\tDEC $dst\n" - "done:" %} - ins_encode %{ - Label p_one, m_one, done; - __ xorptr($dst$$Register, $dst$$Register); - __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register)); - __ jccb(Assembler::less, m_one); - __ jccb(Assembler::greater, p_one); - __ cmpl($src1$$Register, $src2$$Register); - __ jccb(Assembler::below, m_one); - __ jccb(Assembler::equal, done); - __ bind(p_one); - __ incrementl($dst$$Register); - __ jmpb(done); - __ bind(m_one); - __ decrementl($dst$$Register); - __ bind(done); - %} - ins_pipe( pipe_slow ); -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{ - match( Set flags (CmpL src zero )); - ins_cost(100); - format %{ "TEST $src.hi,$src.hi" %} - opcode(0x85); - ins_encode( OpcP, RegReg_Hi2( src, src ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Manifest a CmpL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{ - match( Set flags (CmpL src1 src2 )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" - "MOV $tmp,$src1.hi\n\t" - "SBB $tmp,$src2.hi\t! Compute flags for long compare" %} - ins_encode( long_cmp_flags2( src1, src2, tmp ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ); - expand %{ - jmpCon(cmp,flags,labl); // JLT or JGE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{ - match(Set flags (CmpUL src zero)); - ins_cost(100); - format %{ "TEST $src.hi,$src.hi" %} - opcode(0x85); - ins_encode(OpcP, RegReg_Hi2(src, src)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Manifest a CmpUL result in the normal flags. Only good for LT or GE -// compares. Can be used for LE or GT compares by reversing arguments. -// NOT GOOD FOR EQ/NE tests. -instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{ - match(Set flags (CmpUL src1 src2)); - effect(TEMP tmp); - ins_cost(300); - format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" - "MOV $tmp,$src1.hi\n\t" - "SBB $tmp,$src2.hi\t! Compute flags for unsigned long compare" %} - ins_encode(long_cmp_flags2(src1, src2, tmp)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge); - expand %{ - jmpCon(cmp, flags, labl); // JLT or JGE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(400); - expand %{ - cmovLL_reg_LTGE(cmp, flags, dst, src); - %} -%} - -instruct cmovLL_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - ins_cost(500); - expand %{ - cmovLL_mem_LTGE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_LTGE(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_LTGE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_LTGE_U(cmpOpU cmp, flagsReg_ulong_LTGE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_LTGE(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{ - match( Set flags (CmpL src zero )); - effect(TEMP tmp); - ins_cost(200); - format %{ "MOV $tmp,$src.lo\n\t" - "OR $tmp,$src.hi\t! Long is EQ/NE 0?" %} - ins_encode( long_cmp_flags0( src, tmp ) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Manifest a CmpL result in the normal flags. Only good for EQ/NE compares. -instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{ - match( Set flags (CmpL src1 src2 )); - ins_cost(200+300); - format %{ "CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t" - "JNE,s skip\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "skip:\t" %} - ins_encode( long_cmp_flags1( src1, src2 ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compare reg == zero/reg OR reg != zero/reg -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ); - expand %{ - jmpCon(cmp,flags,labl); // JEQ or JNE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. -instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{ - match(Set flags (CmpUL src zero)); - effect(TEMP tmp); - ins_cost(200); - format %{ "MOV $tmp,$src.lo\n\t" - "OR $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %} - ins_encode(long_cmp_flags0(src, tmp)); - ins_pipe(ialu_reg_reg_long); -%} - -// Manifest a CmpUL result in the normal flags. Only good for EQ/NE compares. -instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{ - match(Set flags (CmpUL src1 src2)); - ins_cost(200+300); - format %{ "CMP $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t" - "JNE,s skip\n\t" - "CMP $src1.hi,$src2.hi\n\t" - "skip:\t" %} - ins_encode(long_cmp_flags1(src1, src2)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compare reg == zero/reg OR reg != zero/reg -// Just a wrapper for a normal branch, plus the predicate test. -instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne); - expand %{ - jmpCon(cmp, flags, labl); // JEQ or JNE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_EQNE(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_EQNE(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_EQNE_U(cmpOpU cmp, flagsReg_ulong_EQNE flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_EQNE(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - -//====== -// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. -// Same as cmpL_reg_flags_LEGT except must negate src -instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{ - match( Set flags (CmpL src zero )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "XOR $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t" - "CMP $tmp,$src.lo\n\t" - "SBB $tmp,$src.hi\n\t" %} - ins_encode( long_cmp_flags3(src, tmp) ); - ins_pipe( ialu_reg_reg_long ); -%} - -// Manifest a CmpL result in the normal flags. Only good for LE or GT compares. -// Same as cmpL_reg_flags_LTGE except operands swapped. Swapping operands -// requires a commuted test to get the same result. -instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{ - match( Set flags (CmpL src1 src2 )); - effect( TEMP tmp ); - ins_cost(300); - format %{ "CMP $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t" - "MOV $tmp,$src2.hi\n\t" - "SBB $tmp,$src1.hi\t! Compute flags for long compare" %} - ins_encode( long_cmp_flags2( src2, src1, tmp ) ); - ins_pipe( ialu_cr_reg_reg ); -%} - -// Long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test -instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le ); - ins_cost(300); - expand %{ - jmpCon(cmp,flags,labl); // JGT or JLE... - %} -%} - -//====== -// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. -// Same as cmpUL_reg_flags_LEGT except must negate src -instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{ - match(Set flags (CmpUL src zero)); - effect(TEMP tmp); - ins_cost(300); - format %{ "XOR $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t" - "CMP $tmp,$src.lo\n\t" - "SBB $tmp,$src.hi\n\t" %} - ins_encode(long_cmp_flags3(src, tmp)); - ins_pipe(ialu_reg_reg_long); -%} - -// Manifest a CmpUL result in the normal flags. Only good for LE or GT compares. -// Same as cmpUL_reg_flags_LTGE except operands swapped. Swapping operands -// requires a commuted test to get the same result. -instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{ - match(Set flags (CmpUL src1 src2)); - effect(TEMP tmp); - ins_cost(300); - format %{ "CMP $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t" - "MOV $tmp,$src2.hi\n\t" - "SBB $tmp,$src1.hi\t! Compute flags for unsigned long compare" %} - ins_encode(long_cmp_flags2( src2, src1, tmp)); - ins_pipe(ialu_cr_reg_reg); -%} - -// Unsigned long compares reg < zero/req OR reg >= zero/req. -// Just a wrapper for a normal branch, plus the predicate test -instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{ - match(If cmp flags); - effect(USE labl); - predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le); - ins_cost(300); - expand %{ - jmpCon(cmp, flags, labl); // JGT or JLE... - %} -%} - -// Compare 2 longs and CMOVE longs. -instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(400); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(500); - format %{ "CMOV$cmp $dst.lo,$src.lo\n\t" - "CMOV$cmp $dst.hi,$src.hi+4" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src), ClearInstMark ); - ins_pipe( pipe_cmov_reg_long ); -%} - -instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst src))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(400); - expand %{ - cmovLL_reg_LEGT(cmp, flags, dst, src); - %} -%} - -instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{ - match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src)))); - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - ins_cost(500); - expand %{ - cmovLL_mem_LEGT(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ints. -instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( SetInstMark, enc_cmov(cmp), RegMem( dst, src ), ClearInstMark ); - ins_pipe( pipe_cmov_mem ); -%} - -instruct cmovII_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, rRegI src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovII_reg_LEGT(cmp, flags, dst, src); - %} -%} - -instruct cmovII_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, rRegI dst, memory src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src)))); - ins_cost(250); - expand %{ - cmovII_mem_LEGT(cmp, flags, dst, src); - %} -%} - -// Compare 2 longs and CMOVE ptrs. -instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - format %{ "CMOV$cmp $dst,$src" %} - opcode(0x0F,0x40); - ins_encode( enc_cmov(cmp), RegReg( dst, src ) ); - ins_pipe( pipe_cmov_reg ); -%} - -// Compare 2 unsigned longs and CMOVE ptrs. -instruct cmovPP_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegP dst, eRegP src) %{ - predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveP (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - cmovPP_reg_LEGT(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{ - predicate( UseSSE<=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovDPR_regS(cmp,flags,dst,src); - %} -%} - -// Compare 2 longs and CMOVE doubles -instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{ - predicate( UseSSE>=2 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveD (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovD_regS(cmp,flags,dst,src); - %} -%} - -instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{ - predicate( UseSSE==0 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovFPR_regS(cmp,flags,dst,src); - %} -%} - - -instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{ - predicate( UseSSE>=1 && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt )); - match(Set dst (CMoveF (Binary cmp flags) (Binary dst src))); - ins_cost(200); - expand %{ - fcmovF_regS(cmp,flags,dst,src); - %} -%} - - -// ============================================================================ -// Procedure Call/Return Instructions -// Call Java Static Instruction -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallStaticJavaDirect(method meth) %{ - match(CallStaticJava); - effect(USE meth); - - ins_cost(300); - format %{ "CALL,static " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - Java_Static_Call( meth ), - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - -// Call Java Dynamic Instruction -// Note: If this code changes, the corresponding ret_addr_offset() and -// compute_padding() functions will have to be adjusted. -instruct CallDynamicJavaDirect(method meth) %{ - match(CallDynamicJava); - effect(USE meth); - - ins_cost(300); - format %{ "MOV EAX,(oop)-1\n\t" - "CALL,dynamic" %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - Java_Dynamic_Call( meth ), - call_epilog, - post_call_FPU ); - ins_pipe( pipe_slow ); - ins_alignment(4); -%} - -// Call Runtime Instruction -instruct CallRuntimeDirect(method meth) %{ - match(CallRuntime ); - effect(USE meth); - - ins_cost(300); - format %{ "CALL,runtime " %} - opcode(0xE8); /* E8 cd */ - // Use FFREEs to clear entries in float stack - ins_encode( pre_call_resets, - FFree_Float_Stack_All, - Java_To_Runtime( meth ), - post_call_FPU ); - ins_pipe( pipe_slow ); -%} - -// Call runtime without safepoint -instruct CallLeafDirect(method meth) %{ - match(CallLeaf); - effect(USE meth); - - ins_cost(300); - format %{ "CALL_LEAF,runtime " %} - opcode(0xE8); /* E8 cd */ - ins_encode( pre_call_resets, - FFree_Float_Stack_All, - Java_To_Runtime( meth ), - Verify_FPU_For_Leaf, post_call_FPU ); - ins_pipe( pipe_slow ); -%} - -instruct CallLeafNoFPDirect(method meth) %{ - match(CallLeafNoFP); - effect(USE meth); - - ins_cost(300); - format %{ "CALL_LEAF_NOFP,runtime " %} - opcode(0xE8); /* E8 cd */ - ins_encode(pre_call_resets, Java_To_Runtime(meth)); - ins_pipe( pipe_slow ); -%} - - -// Return Instruction -// Remove the return address & jump to it. -instruct Ret() %{ - match(Return); - format %{ "RET" %} - opcode(0xC3); - ins_encode(OpcP); - ins_pipe( pipe_jmp ); -%} - -// Tail Call; Jump from runtime stub to Java code. -// Also known as an 'interprocedural jump'. -// Target of jump will eventually return to caller. -// TailJump below removes the return address. -// Don't use ebp for 'jump_target' because a MachEpilogNode has already been -// emitted just above the TailCall which has reset ebp to the caller state. -instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_ptr) %{ - match(TailCall jump_target method_ptr); - ins_cost(300); - format %{ "JMP $jump_target \t# EBX holds method" %} - opcode(0xFF, 0x4); /* Opcode FF /4 */ - ins_encode( OpcP, RegOpc(jump_target) ); - ins_pipe( pipe_jmp ); -%} - - -// Tail Jump; remove the return address; jump to target. -// TailCall above leaves the return address around. -instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{ - match( TailJump jump_target ex_oop ); - ins_cost(300); - format %{ "POP EDX\t# pop return address into dummy\n\t" - "JMP $jump_target " %} - opcode(0xFF, 0x4); /* Opcode FF /4 */ - ins_encode( enc_pop_rdx, - OpcP, RegOpc(jump_target) ); - ins_pipe( pipe_jmp ); -%} - -// Forward exception. -instruct ForwardExceptionjmp() -%{ - match(ForwardException); - - format %{ "JMP forward_exception_stub" %} - ins_encode %{ - __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()), noreg); - %} - ins_pipe(pipe_jmp); -%} - -// Create exception oop: created by stack-crawling runtime code. -// Created exception is now available to this handler, and is setup -// just prior to jumping to this handler. No code emitted. -instruct CreateException( eAXRegP ex_oop ) -%{ - match(Set ex_oop (CreateEx)); - - size(0); - // use the following format syntax - format %{ "# exception oop is in EAX; no code emitted" %} - ins_encode(); - ins_pipe( empty ); -%} - - -// Rethrow exception: -// The exception oop will come in the first argument position. -// Then JUMP (not call) to the rethrow stub code. -instruct RethrowException() -%{ - match(Rethrow); - - // use the following format syntax - format %{ "JMP rethrow_stub" %} - ins_encode(enc_rethrow); - ins_pipe( pipe_jmp ); -%} - -// inlined locking and unlocking - -instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr, eRegP thread) %{ - predicate(LockingMode != LM_LIGHTWEIGHT); - match(Set cr (FastLock object box)); - effect(TEMP tmp, TEMP scr, USE_KILL box, TEMP thread); - ins_cost(300); - format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_lock($object$$Register, $box$$Register, $tmp$$Register, - $scr$$Register, noreg, noreg, $thread$$Register, nullptr); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{ - predicate(LockingMode != LM_LIGHTWEIGHT); - match(Set cr (FastUnlock object box)); - effect(TEMP tmp, USE_KILL box); - ins_cost(300); - format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %} - ins_encode %{ - __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastLockLightweight(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI eax_reg, eRegP tmp, eRegP thread) %{ - predicate(LockingMode == LM_LIGHTWEIGHT); - match(Set cr (FastLock object box)); - effect(TEMP eax_reg, TEMP tmp, USE_KILL box, TEMP thread); - ins_cost(300); - format %{ "FASTLOCK $object,$box\t! kills $box,$eax_reg,$tmp" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_lock_lightweight($object$$Register, $box$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct cmpFastUnlockLightweight(eFlagsReg cr, eRegP object, eAXRegP eax_reg, eRegP tmp, eRegP thread) %{ - predicate(LockingMode == LM_LIGHTWEIGHT); - match(Set cr (FastUnlock object eax_reg)); - effect(TEMP tmp, USE_KILL eax_reg, TEMP thread); - ins_cost(300); - format %{ "FASTUNLOCK $object,$eax_reg\t! kills $eax_reg,$tmp" %} - ins_encode %{ - __ get_thread($thread$$Register); - __ fast_unlock_lightweight($object$$Register, $eax_reg$$Register, $tmp$$Register, $thread$$Register); - %} - ins_pipe(pipe_slow); -%} - -instruct mask_all_evexL_LT32(kReg dst, eRegL src) %{ - predicate(Matcher::vector_length(n) <= 32); - match(Set dst (MaskAll src)); - format %{ "mask_all_evexL_LE32 $dst, $src \t" %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct mask_all_evexL_GT32(kReg dst, eRegL src, kReg ktmp) %{ - predicate(Matcher::vector_length(n) > 32); - match(Set dst (MaskAll src)); - effect(TEMP ktmp); - format %{ "mask_all_evexL_GT32 $dst, $src \t! using $ktmp as TEMP " %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -instruct mask_all_evexI_GT32(kReg dst, rRegI src, kReg ktmp) %{ - predicate(Matcher::vector_length(n) > 32); - match(Set dst (MaskAll src)); - effect(TEMP ktmp); - format %{ "mask_all_evexI_GT32 $dst, $src \t! using $ktmp as TEMP" %} - ins_encode %{ - int mask_len = Matcher::vector_length(this); - __ vector_maskall_operation32($dst$$KRegister, $src$$Register, $ktmp$$KRegister, mask_len); - %} - ins_pipe( pipe_slow ); -%} - -// ============================================================================ -// Safepoint Instruction -instruct safePoint_poll_tls(eFlagsReg cr, eRegP_no_EBP poll) %{ - match(SafePoint poll); - effect(KILL cr, USE poll); - - format %{ "TSTL #EAX,[$poll]\t! Safepoint: poll for GC" %} - ins_cost(125); - // EBP would need size(3) - size(2); /* setting an explicit size will cause debug builds to assert if size is incorrect */ - ins_encode %{ - __ set_inst_mark(); - __ relocate(relocInfo::poll_type); - __ clear_inst_mark(); - address pre_pc = __ pc(); - __ testl(rax, Address($poll$$Register, 0)); - address post_pc = __ pc(); - guarantee(pre_pc[0] == 0x85, "must emit test-ax [reg]"); - %} - ins_pipe(ialu_reg_mem); -%} - - -// ============================================================================ -// This name is KNOWN by the ADLC and cannot be changed. -// The ADLC forces a 'TypeRawPtr::BOTTOM' output type -// for this guy. -instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{ - match(Set dst (ThreadLocal)); - effect(DEF dst, KILL cr); - - format %{ "MOV $dst, Thread::current()" %} - ins_encode %{ - Register dstReg = as_Register($dst$$reg); - __ get_thread(dstReg); - %} - ins_pipe( ialu_reg_fat ); -%} - - - -//----------PEEPHOLE RULES----------------------------------------------------- -// These must follow all instruction definitions as they use the names -// defined in the instructions definitions. -// -// peepmatch ( root_instr_name [preceding_instruction]* ); -// -// peepconstraint %{ -// (instruction_number.operand_name relational_op instruction_number.operand_name -// [, ...] ); -// // instruction numbers are zero-based using left to right order in peepmatch -// -// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); -// // provide an instruction_number.operand_name for each operand that appears -// // in the replacement instruction's match rule -// -// ---------VM FLAGS--------------------------------------------------------- -// -// All peephole optimizations can be turned off using -XX:-OptoPeephole -// -// Each peephole rule is given an identifying number starting with zero and -// increasing by one in the order seen by the parser. An individual peephole -// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# -// on the command-line. -// -// ---------CURRENT LIMITATIONS---------------------------------------------- -// -// Only match adjacent instructions in same basic block -// Only equality constraints -// Only constraints between operands, not (0.dest_reg == EAX_enc) -// Only one replacement instruction -// -// ---------EXAMPLE---------------------------------------------------------- -// -// // pertinent parts of existing instructions in architecture description -// instruct movI(rRegI dst, rRegI src) %{ -// match(Set dst (CopyI src)); -// %} -// -// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{ -// match(Set dst (AddI dst src)); -// effect(KILL cr); -// %} -// -// // Change (inc mov) to lea -// peephole %{ -// // increment preceded by register-register move -// peepmatch ( incI_eReg movI ); -// // require that the destination register of the increment -// // match the destination register of the move -// peepconstraint ( 0.dst == 1.dst ); -// // construct a replacement instruction that sets -// // the destination to ( move's source register + one ) -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// Implementation no longer uses movX instructions since -// machine-independent system no longer uses CopyX nodes. -// -// peephole %{ -// peepmatch ( incI_eReg movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( decI_eReg movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( addI_eReg_imm movI ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); -// %} -// -// peephole %{ -// peepmatch ( addP_eReg_imm movP ); -// peepconstraint ( 0.dst == 1.dst ); -// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); -// %} - -// // Change load of spilled value to only a spill -// instruct storeI(memory mem, rRegI src) %{ -// match(Set mem (StoreI mem src)); -// %} -// -// instruct loadI(rRegI dst, memory mem) %{ -// match(Set dst (LoadI mem)); -// %} -// -peephole %{ - peepmatch ( loadI storeI ); - peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); - peepreplace ( storeI( 1.mem 1.mem 1.src ) ); -%} - -//----------SMARTSPILL RULES--------------------------------------------------- -// These must follow all instruction definitions as they use the names -// defined in the instructions definitions. diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 4667922505cbc..5407235b4a3f2 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -852,7 +852,7 @@ void MachPrologNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc *ra_) const { __ bind(L_skip_barrier); } - __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, false, C->stub_function() != nullptr); + __ verified_entry(framesize, C->output()->need_stack_bang(bangsize)?bangsize:0, C->stub_function() != nullptr); C->output()->set_frame_complete(__ offset()); diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp index 1cd445138899c..46b1b9a91725c 100644 --- a/src/hotspot/os/linux/os_linux.hpp +++ b/src/hotspot/os/linux/os_linux.hpp @@ -101,8 +101,6 @@ class os::Linux { static const char *dll_path(void* lib); static void init_thread_fpu_state(); - static int get_fpu_control_word(); - static void set_fpu_control_word(int fpu_control); static pthread_t main_thread(void) { return _main_thread; } // returns kernel thread id (similar to LWP id on Solaris), which can be // used to access /proc diff --git a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp index 9ba246f553d88..db10904a5d90c 100644 --- a/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp +++ b/src/hotspot/os_cpu/bsd_x86/atomic_bsd_x86.hpp @@ -93,7 +93,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, return exchange_value; } -#ifdef AMD64 template<> template inline D Atomic::PlatformAdd<8>::fetch_then_add(D volatile* dest, I add_value, @@ -135,51 +134,6 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, return exchange_value; } -#else // !AMD64 - -extern "C" { - // defined in bsd_x86.s - int64_t _Atomic_cmpxchg_long(int64_t, volatile int64_t*, int64_t); - void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst); -} - -template<> -template -inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, - T compare_value, - T exchange_value, - atomic_memory_order /* order */) const { - STATIC_ASSERT(8 == sizeof(T)); - return cmpxchg_using_helper(_Atomic_cmpxchg_long, dest, compare_value, exchange_value); -} - -// No direct support for 8-byte xchg; emulate using cmpxchg. -template<> -struct Atomic::PlatformXchg<8> : Atomic::XchgUsingCmpxchg<8> {}; - -// No direct support for 8-byte add; emulate using cmpxchg. -template<> -struct Atomic::PlatformAdd<8> : Atomic::AddUsingCmpxchg<8> {}; - -template<> -template -inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { - STATIC_ASSERT(8 == sizeof(T)); - volatile int64_t dest; - _Atomic_move_long(reinterpret_cast(src), reinterpret_cast(&dest)); - return PrimitiveConversions::cast(dest); -} - -template<> -template -inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, - T store_value) const { - STATIC_ASSERT(8 == sizeof(T)); - _Atomic_move_long(reinterpret_cast(&store_value), reinterpret_cast(dest)); -} - -#endif // AMD64 - template<> struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> { @@ -216,7 +170,6 @@ struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> } }; -#ifdef AMD64 template<> struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> { @@ -228,6 +181,5 @@ struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> : "memory"); } }; -#endif // AMD64 #endif // OS_CPU_BSD_X86_ATOMIC_BSD_X86_HPP diff --git a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S b/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S deleted file mode 100644 index 7d8892bcd8798..0000000000000 --- a/src/hotspot/os_cpu/bsd_x86/bsd_x86_32.S +++ /dev/null @@ -1,525 +0,0 @@ -# -# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - # NOTE WELL! The _Copy functions are called directly - # from server-compiler-generated code via CallLeafNoFP, - # which means that they *must* either not use floating - # point or use it in the same manner as does the server - # compiler. - - .text - -# Set fpu to 53 bit precision. This happens too early to use a stub. - .p2align 4,,15 -DECLARE_FUNC(fixcw): - pushl $0x27f - fldcw 0(%esp) - popl %eax - ret - - .p2align 4,,15 -DECLARE_FUNC(SpinPause): - rep - nop - movl $1, %eax - ret - - # Support for void Copy::arrayof_conjoint_bytes(void* from, - # void* to, - # size_t count) - # - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_bytes): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe acb_CopyRight - cmpl %eax,%edi - jbe acb_CopyLeft - # copy from low to high -acb_CopyRight: - cmpl $3,%ecx - jbe 5f -1: movl %ecx,%eax - shrl $2,%ecx - jz 4f - cmpl $32,%ecx - ja 3f - # copy aligned dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy aligned dwords -3: rep; smovl -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret -acb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f -1: shrl $2,%ecx - jz 4f - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - jmp 4f - .space 8 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jshorts_atomic(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jshorts_atomic): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe cs_CopyRight - cmpl %eax,%edi - jbe cs_CopyLeft - # copy from low to high -cs_CopyRight: - # align source address at dword address boundary - movl %esi,%eax # original from - andl $3,%eax # either 0 or 2 - jz 1f # no prefix - # copy prefix - subl $1,%ecx - jl 5f # zero count - movw (%esi),%dx - movw %dx,(%edi) - addl %eax,%esi # %eax == 2 - addl %eax,%edi -1: movl %ecx,%eax # word count less prefix - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret - # copy from high to low -cs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 -1: sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe acs_CopyRight - cmpl %eax,%edi - jbe acs_CopyLeft -acs_CopyRight: - movl %ecx,%eax # word count - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords - .space 5 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret -acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jints_atomic(void* from, - # void* to, - # size_t count) - # Equivalent to - # arrayof_conjoint_jints - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jints_atomic): -DECLARE_FUNC(_Copy_arrayof_conjoint_jints): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 - jbe ci_CopyRight - cmpl %eax,%edi - jbe ci_CopyLeft -ci_CopyRight: - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - popl %edi - popl %esi - ret - .space 10 -2: subl %esi,%edi - jmp 4f - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi -4: subl $1,%ecx - jge 3b - popl %edi - popl %esi - ret -ci_CopyLeft: - std - leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 - cmpl $32,%ecx - ja 4f # > 32 dwords - subl %eax,%edi # eax == from + count*4 - 4 - jmp 3f - .p2align 4,,15 -2: movl (%eax),%edx - movl %edx,(%edi,%eax,1) - subl $4,%eax -3: subl $1,%ecx - jge 2b - cld - popl %edi - popl %esi - ret -4: movl %eax,%esi # from + count*4 - 4 - rep; smovl - cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jlongs_atomic(jlong* from, - # jlong* to, - # size_t count) - # - # 32-bit - # - # count treated as signed - # - # // if (from > to) { - # while (--count >= 0) { - # *to++ = *from++; - # } - # } else { - # while (--count >= 0) { - # to[count] = from[count]; - # } - # } - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jlongs_atomic): - movl 4+8(%esp),%ecx # count - movl 4+0(%esp),%eax # from - movl 4+4(%esp),%edx # to - cmpl %eax,%edx - jae cla_CopyLeft -cla_CopyRight: - subl %eax,%edx - jmp 2f - .p2align 4,,15 -1: fildll (%eax) - fistpll (%edx,%eax,1) - addl $8,%eax -2: subl $1,%ecx - jge 1b - ret - .p2align 4,,15 -3: fildll (%eax,%ecx,8) - fistpll (%edx,%ecx,8) -cla_CopyLeft: - subl $1,%ecx - jge 3b - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx - pushl %edi - movl 8+ 4(%esp),%esi - movl 8+ 8(%esp),%edi - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax - jbe mmx_acs_CopyRight - cmpl %eax,%edi - jbe mmx_acs_CopyLeft -mmx_acs_CopyRight: - movl %ecx,%eax - sarl %ecx - je 5f - cmpl $33,%ecx - jae 3f -1: subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 5f -3: smovl # align to 8 bytes, we know we are 4 byte aligned to start - subl $1,%ecx -4: .p2align 4,,15 - movq 0(%esi),%mm0 - addl $64,%edi - movq 8(%esi),%mm1 - subl $16,%ecx - movq 16(%esi),%mm2 - movq %mm0,-64(%edi) - movq 24(%esi),%mm0 - movq %mm1,-56(%edi) - movq 32(%esi),%mm1 - movq %mm2,-48(%edi) - movq 40(%esi),%mm2 - movq %mm0,-40(%edi) - movq 48(%esi),%mm0 - movq %mm1,-32(%edi) - movq 56(%esi),%mm1 - movq %mm2,-24(%edi) - movq %mm0,-16(%edi) - addl $64,%esi - movq %mm1,-8(%edi) - cmpl $16,%ecx - jge 4b - emms - testl %ecx,%ecx - ja 1b -5: andl $1,%eax - je 7f -6: movw (%esi),%dx - movw %dx,(%edi) -7: popl %edi - popl %esi - ret -mmx_acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi - movl %eax,%esi - movl %ecx,%eax - subl $2,%esi - sarl %ecx - je 4f - cmpl $32,%ecx - ja 3f - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax - je 6f - addl $2,%esi - addl $2,%edi -5: movw (%esi),%dx - movw %dx,(%edi) -6: cld - popl %edi - popl %esi - ret - - - # Support for int64_t Atomic::cmpxchg(int64_t compare_value, - # volatile int64_t* dest, - # int64_t exchange_value) - # - .p2align 4,,15 -DECLARE_FUNC(_Atomic_cmpxchg_long): - # 8(%esp) : return PC - pushl %ebx # 4(%esp) : old %ebx - pushl %edi # 0(%esp) : old %edi - movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) - movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) - movl 24(%esp), %eax # 24(%esp) : compare_value (low) - movl 28(%esp), %edx # 28(%esp) : compare_value (high) - movl 20(%esp), %edi # 20(%esp) : dest - lock - cmpxchg8b (%edi) - popl %edi - popl %ebx - ret - - - # Support for int64_t Atomic::load and Atomic::store. - # void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst) - .p2align 4,,15 -DECLARE_FUNC(_Atomic_move_long): - movl 4(%esp), %eax # src - fildll (%eax) - movl 8(%esp), %eax # dest - fistpll (%eax) - ret diff --git a/src/hotspot/os_cpu/bsd_x86/globals_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/globals_bsd_x86.hpp index f67bb15c69ef4..292759198ec8b 100644 --- a/src/hotspot/os_cpu/bsd_x86/globals_bsd_x86.hpp +++ b/src/hotspot/os_cpu/bsd_x86/globals_bsd_x86.hpp @@ -29,19 +29,9 @@ // Sets the default values for platform dependent flags used by the runtime system. // (see globals.hpp) // -#ifdef AMD64 define_pd_global(intx, CompilerThreadStackSize, 1024); define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default define_pd_global(intx, VMThreadStackSize, 1024); -#else -define_pd_global(intx, CompilerThreadStackSize, 512); -// ThreadStackSize 320 allows a couple of test cases to run while -// keeping the number of threads that can be created high. System -// default ThreadStackSize appears to be 512 which is too big. -define_pd_global(intx, ThreadStackSize, 320); -define_pd_global(intx, VMThreadStackSize, 512); -#endif // AMD64 - define_pd_global(size_t, JVMInvokeMethodSlack, 8192); diff --git a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp index 90e2574abf2a2..c1d9849c8d0ec 100644 --- a/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp +++ b/src/hotspot/os_cpu/bsd_x86/orderAccess_bsd_x86.hpp @@ -51,11 +51,7 @@ inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { // always use locked addl since mfence is sometimes expensive -#ifdef AMD64 __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory"); -#else - __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); -#endif compiler_barrier(); } diff --git a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp index 153c5ad7e2b76..1a6bee07e2d8c 100644 --- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp +++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp @@ -87,17 +87,11 @@ # define OS_X_10_9_0_KERNEL_MAJOR_VERSION 13 #endif -#ifdef AMD64 #define SPELL_REG_SP "rsp" #define SPELL_REG_FP "rbp" -#else -#define SPELL_REG_SP "esp" -#define SPELL_REG_FP "ebp" -#endif // AMD64 #ifdef __FreeBSD__ # define context_trapno uc_mcontext.mc_trapno -# ifdef AMD64 # define context_pc uc_mcontext.mc_rip # define context_sp uc_mcontext.mc_rsp # define context_fp uc_mcontext.mc_rbp @@ -120,22 +114,6 @@ # define context_r15 uc_mcontext.mc_r15 # define context_flags uc_mcontext.mc_flags # define context_err uc_mcontext.mc_err -# else -# define context_pc uc_mcontext.mc_eip -# define context_sp uc_mcontext.mc_esp -# define context_fp uc_mcontext.mc_ebp -# define context_eip uc_mcontext.mc_eip -# define context_esp uc_mcontext.mc_esp -# define context_eax uc_mcontext.mc_eax -# define context_ebx uc_mcontext.mc_ebx -# define context_ecx uc_mcontext.mc_ecx -# define context_edx uc_mcontext.mc_edx -# define context_ebp uc_mcontext.mc_ebp -# define context_esi uc_mcontext.mc_esi -# define context_edi uc_mcontext.mc_edi -# define context_eflags uc_mcontext.mc_eflags -# define context_trapno uc_mcontext.mc_trapno -# endif #endif #ifdef __APPLE__ @@ -146,7 +124,6 @@ #define DU3_PREFIX(s, m) s ## . ## m # endif -# ifdef AMD64 # define context_pc context_rip # define context_sp context_rsp # define context_fp context_rbp @@ -170,27 +147,10 @@ # define context_flags uc_mcontext->DU3_PREFIX(ss,rflags) # define context_trapno uc_mcontext->DU3_PREFIX(es,trapno) # define context_err uc_mcontext->DU3_PREFIX(es,err) -# else -# define context_pc context_eip -# define context_sp context_esp -# define context_fp context_ebp -# define context_eip uc_mcontext->DU3_PREFIX(ss,eip) -# define context_esp uc_mcontext->DU3_PREFIX(ss,esp) -# define context_eax uc_mcontext->DU3_PREFIX(ss,eax) -# define context_ebx uc_mcontext->DU3_PREFIX(ss,ebx) -# define context_ecx uc_mcontext->DU3_PREFIX(ss,ecx) -# define context_edx uc_mcontext->DU3_PREFIX(ss,edx) -# define context_ebp uc_mcontext->DU3_PREFIX(ss,ebp) -# define context_esi uc_mcontext->DU3_PREFIX(ss,esi) -# define context_edi uc_mcontext->DU3_PREFIX(ss,edi) -# define context_eflags uc_mcontext->DU3_PREFIX(ss,eflags) -# define context_trapno uc_mcontext->DU3_PREFIX(es,trapno) -# endif #endif #ifdef __OpenBSD__ # define context_trapno sc_trapno -# ifdef AMD64 # define context_pc sc_rip # define context_sp sc_rsp # define context_fp sc_rbp @@ -213,27 +173,10 @@ # define context_r15 sc_r15 # define context_flags sc_rflags # define context_err sc_err -# else -# define context_pc sc_eip -# define context_sp sc_esp -# define context_fp sc_ebp -# define context_eip sc_eip -# define context_esp sc_esp -# define context_eax sc_eax -# define context_ebx sc_ebx -# define context_ecx sc_ecx -# define context_edx sc_edx -# define context_ebp sc_ebp -# define context_esi sc_esi -# define context_edi sc_edi -# define context_eflags sc_eflags -# define context_trapno sc_trapno -# endif #endif #ifdef __NetBSD__ # define context_trapno uc_mcontext.__gregs[_REG_TRAPNO] -# ifdef AMD64 # define __register_t __greg_t # define context_pc uc_mcontext.__gregs[_REG_RIP] # define context_sp uc_mcontext.__gregs[_REG_URSP] @@ -257,22 +200,6 @@ # define context_r15 uc_mcontext.__gregs[_REG_R15] # define context_flags uc_mcontext.__gregs[_REG_RFL] # define context_err uc_mcontext.__gregs[_REG_ERR] -# else -# define context_pc uc_mcontext.__gregs[_REG_EIP] -# define context_sp uc_mcontext.__gregs[_REG_UESP] -# define context_fp uc_mcontext.__gregs[_REG_EBP] -# define context_eip uc_mcontext.__gregs[_REG_EIP] -# define context_esp uc_mcontext.__gregs[_REG_UESP] -# define context_eax uc_mcontext.__gregs[_REG_EAX] -# define context_ebx uc_mcontext.__gregs[_REG_EBX] -# define context_ecx uc_mcontext.__gregs[_REG_ECX] -# define context_edx uc_mcontext.__gregs[_REG_EDX] -# define context_ebp uc_mcontext.__gregs[_REG_EBP] -# define context_esi uc_mcontext.__gregs[_REG_ESI] -# define context_edi uc_mcontext.__gregs[_REG_EDI] -# define context_eflags uc_mcontext.__gregs[_REG_EFL] -# define context_trapno uc_mcontext.__gregs[_REG_TRAPNO] -# endif #endif address os::current_stack_pointer() { @@ -422,7 +349,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, stub = VM_Version::cpuinfo_cont_addr(); } -#if !defined(PRODUCT) && defined(_LP64) +#if !defined(PRODUCT) if ((sig == SIGSEGV || sig == SIGBUS) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) { // Verify that OS save/restore APX registers. stub = VM_Version::cpuinfo_cont_addr_apx(); @@ -463,9 +390,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } - } else -#ifdef AMD64 - if (sig == SIGFPE && + } else if (sig == SIGFPE && (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV // Workaround for macOS ARM incorrectly reporting FPE_FLTINV for "div by 0" // instead of the expected FPE_FLTDIV when running x86_64 binary under Rosetta emulation @@ -498,33 +423,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } #endif /* __APPLE__ */ -#else - if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { - // HACK: si_code does not work on bsd 2.2.12-20!!! - int op = pc[0]; - if (op == 0xDB) { - // FIST - // TODO: The encoding of D2I in x86_32.ad can cause an exception - // prior to the fist instruction if there was an invalid operation - // pending. We want to dismiss that exception. From the win_32 - // side it also seems that if it really was the fist causing - // the exception that we do the d2i by hand with different - // rounding. Seems kind of weird. - // NOTE: that we take the exception at the NEXT floating point instruction. - assert(pc[0] == 0xDB, "not a FIST opcode"); - assert(pc[1] == 0x14, "not a FIST opcode"); - assert(pc[2] == 0x24, "not a FIST opcode"); - return true; - } else if (op == 0xF7) { - // IDIV - stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); - } else { - // TODO: handle more cases if we are using other x86 instructions - // that can generate SIGFPE signal on bsd. - tty->print_cr("unknown opcode 0x%X with SIGFPE.", op); - fatal("please update this code."); - } -#endif // AMD64 } else if ((sig == SIGSEGV || sig == SIGBUS) && MacroAssembler::uses_implicit_null_check(info->si_addr)) { // Determination of interpreter/vtable stub/compiled code null exception @@ -551,81 +449,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } } -#ifndef AMD64 - // Execution protection violation - // - // This should be kept as the last step in the triage. We don't - // have a dedicated trap number for a no-execute fault, so be - // conservative and allow other handlers the first shot. - // - // Note: We don't test that info->si_code == SEGV_ACCERR here. - // this si_code is so generic that it is almost meaningless; and - // the si_code for this condition may change in the future. - // Furthermore, a false-positive should be harmless. - if (UnguardOnExecutionViolation > 0 && - stub == nullptr && - (sig == SIGSEGV || sig == SIGBUS) && - uc->context_trapno == trap_page_fault) { - size_t page_size = os::vm_page_size(); - address addr = (address) info->si_addr; - address pc = os::Posix::ucontext_get_pc(uc); - // Make sure the pc and the faulting address are sane. - // - // If an instruction spans a page boundary, and the page containing - // the beginning of the instruction is executable but the following - // page is not, the pc and the faulting address might be slightly - // different - we still want to unguard the 2nd page in this case. - // - // 15 bytes seems to be a (very) safe value for max instruction size. - bool pc_is_near_addr = - (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); - bool instr_spans_page_boundary = - (align_down((intptr_t) pc ^ (intptr_t) addr, - (intptr_t) page_size) > 0); - - if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { - static volatile address last_addr = - (address) os::non_memory_address_word(); - - // In conservative mode, don't unguard unless the address is in the VM - if (addr != last_addr && - (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { - - // Set memory to RWX and retry - address page_start = align_down(addr, page_size); - bool res = os::protect_memory((char*) page_start, page_size, - os::MEM_PROT_RWX); - - log_debug(os)("Execution protection violation " - "at " INTPTR_FORMAT - ", unguarding " INTPTR_FORMAT ": %s, errno=%d", p2i(addr), - p2i(page_start), (res ? "success" : "failed"), errno); - stub = pc; - - // Set last_addr so if we fault again at the same address, we don't end - // up in an endless loop. - // - // There are two potential complications here. Two threads trapping at - // the same address at the same time could cause one of the threads to - // think it already unguarded, and abort the VM. Likely very rare. - // - // The other race involves two threads alternately trapping at - // different addresses and failing to unguard the page, resulting in - // an endless loop. This condition is probably even more unlikely than - // the first. - // - // Although both cases could be avoided by using locks or thread local - // last_addr, these solutions are unnecessary complication: this - // handler is a best-effort safety net, not a complete solution. It is - // disabled by default and should only be used as a workaround in case - // we missed any no-execute-unsafe VM code. - - last_addr = addr; - } - } - } -#endif // !AMD64 - if (stub != nullptr) { // save all thread context in case we need to restore it if (thread != nullptr) thread->set_saved_exception_pc(pc); @@ -641,10 +464,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, extern "C" void fixcw(); void os::Bsd::init_thread_fpu_state(void) { -#ifndef AMD64 - // Set fpu to 53 bit precision. This happens too early to use a stub. - fixcw(); -#endif // !AMD64 + // Nothing to do. } juint os::cpu_microcode_revision() { @@ -666,26 +486,16 @@ juint os::cpu_microcode_revision() { // HotSpot guard pages is added later. size_t os::_compiler_thread_min_stack_allowed = 48 * K; size_t os::_java_thread_min_stack_allowed = 48 * K; -#ifdef _LP64 size_t os::_vm_internal_thread_min_stack_allowed = 64 * K; -#else -size_t os::_vm_internal_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K; -#endif // _LP64 -#ifndef AMD64 #ifdef __GNUC__ #define GET_GS() ({int gs; __asm__ volatile("movw %%gs, %w0":"=q"(gs)); gs&0xffff;}) #endif -#endif // AMD64 // return default stack size for thr_type size_t os::Posix::default_stack_size(os::ThreadType thr_type) { // default stack size (compiler thread needs larger stack) -#ifdef AMD64 size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); -#else - size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); -#endif // AMD64 return s; } @@ -798,7 +608,6 @@ void os::print_context(outputStream *st, const void *context) { const ucontext_t *uc = (const ucontext_t*)context; st->print_cr("Registers:"); -#ifdef AMD64 st->print( "RAX=" INTPTR_FORMAT, (intptr_t)uc->context_rax); st->print(", RBX=" INTPTR_FORMAT, (intptr_t)uc->context_rbx); st->print(", RCX=" INTPTR_FORMAT, (intptr_t)uc->context_rcx); @@ -824,26 +633,12 @@ void os::print_context(outputStream *st, const void *context) { st->print(", ERR=" INTPTR_FORMAT, (intptr_t)uc->context_err); st->cr(); st->print(" TRAPNO=" INTPTR_FORMAT, (intptr_t)uc->context_trapno); -#else - st->print( "EAX=" INTPTR_FORMAT, (intptr_t)uc->context_eax); - st->print(", EBX=" INTPTR_FORMAT, (intptr_t)uc->context_ebx); - st->print(", ECX=" INTPTR_FORMAT, (intptr_t)uc->context_ecx); - st->print(", EDX=" INTPTR_FORMAT, (intptr_t)uc->context_edx); - st->cr(); - st->print( "ESP=" INTPTR_FORMAT, (intptr_t)uc->context_esp); - st->print(", EBP=" INTPTR_FORMAT, (intptr_t)uc->context_ebp); - st->print(", ESI=" INTPTR_FORMAT, (intptr_t)uc->context_esi); - st->print(", EDI=" INTPTR_FORMAT, (intptr_t)uc->context_edi); - st->cr(); - st->print( "EIP=" INTPTR_FORMAT, (intptr_t)uc->context_eip); - st->print(", EFLAGS=" INTPTR_FORMAT, (intptr_t)uc->context_eflags); -#endif // AMD64 st->cr(); st->cr(); } void os::print_register_info(outputStream *st, const void *context, int& continuation) { - const int register_count = AMD64_ONLY(16) NOT_AMD64(8); + const int register_count = 16; int n = continuation; assert(n >= 0 && n <= register_count, "Invalid continuation value"); if (context == nullptr || n == register_count) { @@ -856,7 +651,6 @@ void os::print_register_info(outputStream *st, const void *context, int& continu continuation = n + 1; # define CASE_PRINT_REG(n, str, id) case n: st->print(str); print_location(st, uc->context_##id); switch (n) { -#ifdef AMD64 CASE_PRINT_REG( 0, "RAX=", rax); break; CASE_PRINT_REG( 1, "RBX=", rbx); break; CASE_PRINT_REG( 2, "RCX=", rcx); break; @@ -873,16 +667,6 @@ void os::print_register_info(outputStream *st, const void *context, int& continu CASE_PRINT_REG(13, "R13=", r13); break; CASE_PRINT_REG(14, "R14=", r14); break; CASE_PRINT_REG(15, "R15=", r15); break; -#else - CASE_PRINT_REG(0, "EAX=", eax); break; - CASE_PRINT_REG(1, "EBX=", ebx); break; - CASE_PRINT_REG(2, "ECX=", ecx); break; - CASE_PRINT_REG(3, "EDX=", edx); break; - CASE_PRINT_REG(4, "ESP=", esp); break; - CASE_PRINT_REG(5, "EBP=", ebp); break; - CASE_PRINT_REG(6, "ESI=", esi); break; - CASE_PRINT_REG(7, "EDI=", edi); break; -#endif // AMD64 } # undef CASE_PRINT_REG ++n; @@ -890,11 +674,7 @@ void os::print_register_info(outputStream *st, const void *context, int& continu } void os::setup_fpu() { -#ifndef AMD64 - address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); - __asm__ volatile ( "fldcw (%0)" : - : "r" (fpu_cntrl) : "memory"); -#endif // !AMD64 + // Nothing to do. } #ifndef PRODUCT diff --git a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.inline.hpp b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.inline.hpp index 5398a642d8460..bcaece10070c3 100644 --- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.inline.hpp +++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.inline.hpp @@ -39,18 +39,11 @@ inline size_t os::cds_core_region_alignment() { // See http://www.technovelty.org/code/c/reading-rdtsc.htl for details inline jlong os::rdtsc() { -#ifndef AMD64 - // 64 bit result in edx:eax - uint64_t res; - __asm__ __volatile__ ("rdtsc" : "=A" (res)); - return (jlong)res; -#else uint64_t res; uint32_t ts1, ts2; __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2)); res = ((uint64_t)ts1 | (uint64_t)ts2 << 32); return (jlong)res; -#endif // AMD64 } #endif // OS_CPU_BSD_X86_OS_BSD_X86_INLINE_HPP diff --git a/src/hotspot/os_cpu/bsd_x86/prefetch_bsd_x86.inline.hpp b/src/hotspot/os_cpu/bsd_x86/prefetch_bsd_x86.inline.hpp index cb0db2f360c76..fdd1d5cf86f05 100644 --- a/src/hotspot/os_cpu/bsd_x86/prefetch_bsd_x86.inline.hpp +++ b/src/hotspot/os_cpu/bsd_x86/prefetch_bsd_x86.inline.hpp @@ -29,19 +29,13 @@ inline void Prefetch::read (const void *loc, intx interval) { -#ifdef AMD64 __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval)); -#endif // AMD64 } inline void Prefetch::write(void *loc, intx interval) { -#ifdef AMD64 - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. // __asm__ ("prefetchw (%0,%1,1)" : : "r" (loc), "r" (interval)); __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval)); - -#endif // AMD64 } #endif // OS_CPU_BSD_X86_PREFETCH_BSD_X86_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp index a7ec163f78553..2cb79269675c8 100644 --- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp +++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp @@ -316,13 +316,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, void os::Linux::init_thread_fpu_state(void) { } -int os::Linux::get_fpu_control_word(void) { - return 0; -} - -void os::Linux::set_fpu_control_word(int fpu_control) { -} - //////////////////////////////////////////////////////////////////////////////// // thread stack diff --git a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp index 861d0d20153f7..81bb0a2315564 100644 --- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp +++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp @@ -425,14 +425,6 @@ void os::Linux::init_thread_fpu_state(void) { os::setup_fpu(); } -int os::Linux::get_fpu_control_word(void) { - return 0; -} - -void os::Linux::set_fpu_control_word(int fpu_control) { - // Nothing to do -} - void os::setup_fpu() { #if !defined(__SOFTFP__) && defined(__VFP_FP__) // Turn on IEEE-754 compliant VFP mode diff --git a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp index f3f9a3a88df67..c43a942e20c57 100644 --- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp +++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp @@ -419,16 +419,6 @@ void os::Linux::init_thread_fpu_state(void) { __asm__ __volatile__ ("mtfsfi 6,0"); } -int os::Linux::get_fpu_control_word(void) { - // x86 has problems with FPU precision after pthread_cond_timedwait(). - // nothing to do on ppc64. - return 0; -} - -void os::Linux::set_fpu_control_word(int fpu_control) { - // x86 has problems with FPU precision after pthread_cond_timedwait(). - // nothing to do on ppc64. -} //////////////////////////////////////////////////////////////////////////////// // thread stack diff --git a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp index a00659f37cb42..f15870b704d0a 100644 --- a/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp +++ b/src/hotspot/os_cpu/linux_riscv/os_linux_riscv.cpp @@ -304,12 +304,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, void os::Linux::init_thread_fpu_state(void) { } -int os::Linux::get_fpu_control_word(void) { - return 0; -} - -void os::Linux::set_fpu_control_word(int fpu_control) { -} //////////////////////////////////////////////////////////////////////////////// // thread stack diff --git a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp index d599359d52903..57a94d7c730e1 100644 --- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp +++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp @@ -375,15 +375,6 @@ void os::Linux::init_thread_fpu_state(void) { // Nothing to do on z/Architecture. } -int os::Linux::get_fpu_control_word(void) { - // Nothing to do on z/Architecture. - return 0; -} - -void os::Linux::set_fpu_control_word(int fpu_control) { - // Nothing to do on z/Architecture. -} - //////////////////////////////////////////////////////////////////////////////// // thread stack diff --git a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp index 0156546ba9b77..b7d42f4b4fb58 100644 --- a/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/atomic_linux_x86.hpp @@ -93,8 +93,6 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest, return exchange_value; } -#ifdef AMD64 - template<> template inline D Atomic::PlatformAdd<8>::fetch_then_add(D volatile* dest, I add_value, @@ -135,51 +133,6 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, return exchange_value; } -#else // !AMD64 - -extern "C" { - // defined in linux_x86.s - int64_t _Atomic_cmpxchg_long(int64_t, volatile int64_t*, int64_t); - void _Atomic_move_long(const volatile int64_t* src, volatile int64_t* dst); -} - -template<> -template -inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest, - T compare_value, - T exchange_value, - atomic_memory_order order) const { - STATIC_ASSERT(8 == sizeof(T)); - return cmpxchg_using_helper(_Atomic_cmpxchg_long, dest, compare_value, exchange_value); -} - -// No direct support for 8-byte xchg; emulate using cmpxchg. -template<> -struct Atomic::PlatformXchg<8> : Atomic::XchgUsingCmpxchg<8> {}; - -// No direct support for 8-byte add; emulate using cmpxchg. -template<> -struct Atomic::PlatformAdd<8> : Atomic::AddUsingCmpxchg<8> {}; - -template<> -template -inline T Atomic::PlatformLoad<8>::operator()(T const volatile* src) const { - STATIC_ASSERT(8 == sizeof(T)); - volatile int64_t dest; - _Atomic_move_long(reinterpret_cast(src), reinterpret_cast(&dest)); - return PrimitiveConversions::cast(dest); -} - -template<> -template -inline void Atomic::PlatformStore<8>::operator()(T volatile* dest, - T store_value) const { - STATIC_ASSERT(8 == sizeof(T)); - _Atomic_move_long(reinterpret_cast(&store_value), reinterpret_cast(dest)); -} - -#endif // AMD64 - template<> struct Atomic::PlatformOrderedStore<1, RELEASE_X_FENCE> { @@ -216,7 +169,6 @@ struct Atomic::PlatformOrderedStore<4, RELEASE_X_FENCE> } }; -#ifdef AMD64 template<> struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> { @@ -228,6 +180,5 @@ struct Atomic::PlatformOrderedStore<8, RELEASE_X_FENCE> : "memory"); } }; -#endif // AMD64 #endif // OS_CPU_LINUX_X86_ATOMIC_LINUX_X86_HPP diff --git a/src/hotspot/os_cpu/linux_x86/globals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/globals_linux_x86.hpp index 97a5732d00a17..ef21b2b9157cd 100644 --- a/src/hotspot/os_cpu/linux_x86/globals_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/globals_linux_x86.hpp @@ -28,24 +28,9 @@ // Sets the default values for platform dependent flags used by the runtime system. // (see globals.hpp) -#ifdef AMD64 define_pd_global(intx, CompilerThreadStackSize, 1024); define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default define_pd_global(intx, VMThreadStackSize, 1024); -#else -// Some tests in debug VM mode run out of compile thread stack. -// Observed on some x86_32 VarHandles tests during escape analysis. -#ifdef ASSERT -define_pd_global(intx, CompilerThreadStackSize, 768); -#else -define_pd_global(intx, CompilerThreadStackSize, 512); -#endif -// ThreadStackSize 320 allows a couple of test cases to run while -// keeping the number of threads that can be created high. System -// default ThreadStackSize appears to be 512 which is too big. -define_pd_global(intx, ThreadStackSize, 320); -define_pd_global(intx, VMThreadStackSize, 512); -#endif // AMD64 define_pd_global(size_t, JVMInvokeMethodSlack, 8192); diff --git a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/linux_x86_32.S deleted file mode 100644 index 43a9a38e57f1d..0000000000000 --- a/src/hotspot/os_cpu/linux_x86/linux_x86_32.S +++ /dev/null @@ -1,518 +0,0 @@ -# -# Copyright (c) 2004, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - # NOTE WELL! The _Copy functions are called directly - # from server-compiler-generated code via CallLeafNoFP, - # which means that they *must* either not use floating - # point or use it in the same manner as does the server - # compiler. - - .text - - .p2align 4,,15 -DECLARE_FUNC(SpinPause): - rep - nop - movl $1, %eax - ret - - # Support for void Copy::arrayof_conjoint_bytes(void* from, - # void* to, - # size_t count) - # - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_bytes): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -1(%esi,%ecx),%eax # from + count - 1 - jbe acb_CopyRight - cmpl %eax,%edi - jbe acb_CopyLeft - # copy from low to high -acb_CopyRight: - cmpl $3,%ecx - jbe 5f -1: movl %ecx,%eax - shrl $2,%ecx - jz 4f - cmpl $32,%ecx - ja 3f - # copy aligned dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f - # copy aligned dwords -3: rep; smovl -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - # copy suffix - xorl %eax,%eax -6: movb (%esi,%eax,1),%dl - movb %dl,(%edi,%eax,1) - addl $1,%eax - subl $1,%ecx - jnz 6b -7: popl %edi - popl %esi - ret -acb_CopyLeft: - std - leal -4(%edi,%ecx),%edi # to + count - 4 - movl %eax,%esi # from + count - 1 - movl %ecx,%eax - subl $3,%esi # from + count - 4 - cmpl $3,%ecx - jbe 5f -1: shrl $2,%ecx - jz 4f - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - jmp 4f - .space 8 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: movl %eax,%ecx -5: andl $3,%ecx - jz 7f - subl %esi,%edi - addl $3,%esi -6: movb (%esi),%dl - movb %dl,(%edi,%esi,1) - subl $1,%esi - subl $1,%ecx - jnz 6b -7: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jshorts_atomic(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jshorts_atomic): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe cs_CopyRight - cmpl %eax,%edi - jbe cs_CopyLeft - # copy from low to high -cs_CopyRight: - # align source address at dword address boundary - movl %esi,%eax # original from - andl $3,%eax # either 0 or 2 - jz 1f # no prefix - # copy prefix - subl $1,%ecx - jl 5f # zero count - movw (%esi),%dx - movw %dx,(%edi) - addl %eax,%esi # %eax == 2 - addl %eax,%edi -1: movl %ecx,%eax # word count less prefix - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret - # copy from high to low -cs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 -1: sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax # from + count*2 - 2 - jbe acs_CopyRight - cmpl %eax,%edi - jbe acs_CopyLeft -acs_CopyRight: - movl %ecx,%eax # word count - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - jbe 2f # <= 32 dwords - # copy aligned dwords - rep; smovl - jmp 4f - # copy aligned dwords - .space 5 -2: subl %esi,%edi - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 3b - addl %esi,%edi -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - movw (%esi),%dx - movw %dx,(%edi) -5: popl %edi - popl %esi - ret -acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi # to + count*2 - 4 - movl %eax,%esi # from + count*2 - 2 - movl %ecx,%eax - subl $2,%esi # from + count*2 - 4 - sarl %ecx # dword count - jz 4f # no dwords to move - cmpl $32,%ecx - ja 3f # > 32 dwords - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax # suffix count - jz 5f # no suffix - # copy suffix - addl $2,%esi - addl $2,%edi - movw (%esi),%dx - movw %dx,(%edi) -5: cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jints_atomic(void* from, - # void* to, - # size_t count) - # Equivalent to - # arrayof_conjoint_jints - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jints_atomic): -DECLARE_FUNC(_Copy_arrayof_conjoint_jints): - pushl %esi - movl 4+12(%esp),%ecx # count - pushl %edi - movl 8+ 4(%esp),%esi # from - movl 8+ 8(%esp),%edi # to - cmpl %esi,%edi - leal -4(%esi,%ecx,4),%eax # from + count*4 - 4 - jbe ci_CopyRight - cmpl %eax,%edi - jbe ci_CopyLeft -ci_CopyRight: - cmpl $32,%ecx - jbe 2f # <= 32 dwords - rep; smovl - popl %edi - popl %esi - ret - .space 10 -2: subl %esi,%edi - jmp 4f - .p2align 4,,15 -3: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi -4: subl $1,%ecx - jge 3b - popl %edi - popl %esi - ret -ci_CopyLeft: - std - leal -4(%edi,%ecx,4),%edi # to + count*4 - 4 - cmpl $32,%ecx - ja 4f # > 32 dwords - subl %eax,%edi # eax == from + count*4 - 4 - jmp 3f - .p2align 4,,15 -2: movl (%eax),%edx - movl %edx,(%edi,%eax,1) - subl $4,%eax -3: subl $1,%ecx - jge 2b - cld - popl %edi - popl %esi - ret -4: movl %eax,%esi # from + count*4 - 4 - rep; smovl - cld - popl %edi - popl %esi - ret - - # Support for void Copy::conjoint_jlongs_atomic(jlong* from, - # jlong* to, - # size_t count) - # - # 32-bit - # - # count treated as signed - /* - # - # if (from > to) { - # while (--count >= 0) { - # *to++ = *from++; - # } - # } else { - # while (--count >= 0) { - # to[count] = from[count]; - # } - # } - */ - .p2align 4,,15 -DECLARE_FUNC(_Copy_conjoint_jlongs_atomic): - movl 4+8(%esp),%ecx # count - movl 4+0(%esp),%eax # from - movl 4+4(%esp),%edx # to - cmpl %eax,%edx - jae cla_CopyLeft -cla_CopyRight: - subl %eax,%edx - jmp 2f - .p2align 4,,15 -1: fildll (%eax) - fistpll (%edx,%eax,1) - addl $8,%eax -2: subl $1,%ecx - jge 1b - ret - .p2align 4,,15 -3: fildll (%eax,%ecx,8) - fistpll (%edx,%ecx,8) -cla_CopyLeft: - subl $1,%ecx - jge 3b - ret - - # Support for void Copy::arrayof_conjoint_jshorts(void* from, - # void* to, - # size_t count) - .p2align 4,,15 -DECLARE_FUNC(_mmx_Copy_arrayof_conjoint_jshorts): - pushl %esi - movl 4+12(%esp),%ecx - pushl %edi - movl 8+ 4(%esp),%esi - movl 8+ 8(%esp),%edi - cmpl %esi,%edi - leal -2(%esi,%ecx,2),%eax - jbe mmx_acs_CopyRight - cmpl %eax,%edi - jbe mmx_acs_CopyLeft -mmx_acs_CopyRight: - movl %ecx,%eax - sarl %ecx - je 5f - cmpl $33,%ecx - jae 3f -1: subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - addl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 5f -3: smovl # align to 8 bytes, we know we are 4 byte aligned to start - subl $1,%ecx -4: .p2align 4,,15 - movq 0(%esi),%mm0 - addl $64,%edi - movq 8(%esi),%mm1 - subl $16,%ecx - movq 16(%esi),%mm2 - movq %mm0,-64(%edi) - movq 24(%esi),%mm0 - movq %mm1,-56(%edi) - movq 32(%esi),%mm1 - movq %mm2,-48(%edi) - movq 40(%esi),%mm2 - movq %mm0,-40(%edi) - movq 48(%esi),%mm0 - movq %mm1,-32(%edi) - movq 56(%esi),%mm1 - movq %mm2,-24(%edi) - movq %mm0,-16(%edi) - addl $64,%esi - movq %mm1,-8(%edi) - cmpl $16,%ecx - jge 4b - emms - testl %ecx,%ecx - ja 1b -5: andl $1,%eax - je 7f -6: movw (%esi),%dx - movw %dx,(%edi) -7: popl %edi - popl %esi - ret -mmx_acs_CopyLeft: - std - leal -4(%edi,%ecx,2),%edi - movl %eax,%esi - movl %ecx,%eax - subl $2,%esi - sarl %ecx - je 4f - cmpl $32,%ecx - ja 3f - subl %esi,%edi - .p2align 4,,15 -2: movl (%esi),%edx - movl %edx,(%edi,%esi,1) - subl $4,%esi - subl $1,%ecx - jnz 2b - addl %esi,%edi - jmp 4f -3: rep; smovl -4: andl $1,%eax - je 6f - addl $2,%esi - addl $2,%edi -5: movw (%esi),%dx - movw %dx,(%edi) -6: cld - popl %edi - popl %esi - ret - - - # Support for jlong Atomic::cmpxchg(volatile jlong* dest, - # jlong compare_value, - # jlong exchange_value) - # - .p2align 4,,15 -DECLARE_FUNC(_Atomic_cmpxchg_long): - # 8(%esp) : return PC - pushl %ebx # 4(%esp) : old %ebx - pushl %edi # 0(%esp) : old %edi - movl 12(%esp), %ebx # 12(%esp) : exchange_value (low) - movl 16(%esp), %ecx # 16(%esp) : exchange_value (high) - movl 24(%esp), %eax # 24(%esp) : compare_value (low) - movl 28(%esp), %edx # 28(%esp) : compare_value (high) - movl 20(%esp), %edi # 20(%esp) : dest - lock cmpxchg8b (%edi) - popl %edi - popl %ebx - ret - - - # Support for jlong Atomic::load and Atomic::store. - # void _Atomic_move_long(const volatile jlong* src, volatile jlong* dst) - .p2align 4,,15 -DECLARE_FUNC(_Atomic_move_long): - movl 4(%esp), %eax # src - fildll (%eax) - movl 8(%esp), %eax # dest - fistpll (%eax) - ret diff --git a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp index a22f547c071be..1902d527ea66d 100644 --- a/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp +++ b/src/hotspot/os_cpu/linux_x86/orderAccess_linux_x86.hpp @@ -47,11 +47,7 @@ inline void OrderAccess::release() { compiler_barrier(); } inline void OrderAccess::fence() { // always use locked addl since mfence is sometimes expensive -#ifdef AMD64 __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory"); -#else - __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); -#endif compiler_barrier(); } @@ -60,13 +56,7 @@ inline void OrderAccess::cross_modify_fence_impl() { __asm__ volatile (".byte 0x0f, 0x01, 0xe8\n\t" : : :); //serialize } else { int idx = 0; -#ifdef AMD64 __asm__ volatile ("cpuid " : "+a" (idx) : : "ebx", "ecx", "edx", "memory"); -#else - // On some x86 systems EBX is a reserved register that cannot be - // clobbered, so we must protect it around the CPUID. - __asm__ volatile ("xchg %%esi, %%ebx; cpuid; xchg %%esi, %%ebx " : "+a" (idx) : : "esi", "ecx", "edx", "memory"); -#endif } } diff --git a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp index e357747bfea46..9bac5ad59a2f0 100644 --- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp +++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp @@ -73,23 +73,12 @@ # include # include # include -#ifndef AMD64 -# include -#endif -#ifdef AMD64 #define REG_SP REG_RSP #define REG_PC REG_RIP #define REG_FP REG_RBP #define SPELL_REG_SP "rsp" #define SPELL_REG_FP "rbp" -#else -#define REG_SP REG_UESP -#define REG_PC REG_EIP -#define REG_FP REG_EBP -#define SPELL_REG_SP "esp" -#define SPELL_REG_FP "ebp" -#endif // AMD64 address os::current_stack_pointer() { return (address)__builtin_frame_address(0); @@ -248,7 +237,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, stub = VM_Version::cpuinfo_cont_addr(); } -#if !defined(PRODUCT) && defined(_LP64) +#if !defined(PRODUCT) if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr_apx(pc)) { // Verify that OS save/restore APX registers. stub = VM_Version::cpuinfo_cont_addr_apx(); @@ -276,9 +265,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } stub = SharedRuntime::handle_unsafe_access(thread, next_pc); } - } else -#ifdef AMD64 - if (sig == SIGFPE && + } else if (sig == SIGFPE && (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) { stub = SharedRuntime:: @@ -286,33 +273,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, pc, SharedRuntime:: IMPLICIT_DIVIDE_BY_ZERO); -#else - if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { - // HACK: si_code does not work on linux 2.2.12-20!!! - int op = pc[0]; - if (op == 0xDB) { - // FIST - // TODO: The encoding of D2I in x86_32.ad can cause an exception - // prior to the fist instruction if there was an invalid operation - // pending. We want to dismiss that exception. From the win_32 - // side it also seems that if it really was the fist causing - // the exception that we do the d2i by hand with different - // rounding. Seems kind of weird. - // NOTE: that we take the exception at the NEXT floating point instruction. - assert(pc[0] == 0xDB, "not a FIST opcode"); - assert(pc[1] == 0x14, "not a FIST opcode"); - assert(pc[2] == 0x24, "not a FIST opcode"); - return true; - } else if (op == 0xF7) { - // IDIV - stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); - } else { - // TODO: handle more cases if we are using other x86 instructions - // that can generate SIGFPE signal on linux. - tty->print_cr("unknown opcode 0x%X with SIGFPE.", op); - fatal("please update this code."); - } -#endif // AMD64 } else if (sig == SIGSEGV && MacroAssembler::uses_implicit_null_check(info->si_addr)) { // Determination of interpreter/vtable stub/compiled code null exception @@ -339,81 +299,6 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } } -#ifndef AMD64 - // Execution protection violation - // - // This should be kept as the last step in the triage. We don't - // have a dedicated trap number for a no-execute fault, so be - // conservative and allow other handlers the first shot. - // - // Note: We don't test that info->si_code == SEGV_ACCERR here. - // this si_code is so generic that it is almost meaningless; and - // the si_code for this condition may change in the future. - // Furthermore, a false-positive should be harmless. - if (UnguardOnExecutionViolation > 0 && - stub == nullptr && - (sig == SIGSEGV || sig == SIGBUS) && - uc->uc_mcontext.gregs[REG_TRAPNO] == trap_page_fault) { - size_t page_size = os::vm_page_size(); - address addr = (address) info->si_addr; - address pc = os::Posix::ucontext_get_pc(uc); - // Make sure the pc and the faulting address are sane. - // - // If an instruction spans a page boundary, and the page containing - // the beginning of the instruction is executable but the following - // page is not, the pc and the faulting address might be slightly - // different - we still want to unguard the 2nd page in this case. - // - // 15 bytes seems to be a (very) safe value for max instruction size. - bool pc_is_near_addr = - (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15); - bool instr_spans_page_boundary = - (align_down((intptr_t) pc ^ (intptr_t) addr, - (intptr_t) page_size) > 0); - - if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) { - static volatile address last_addr = - (address) os::non_memory_address_word(); - - // In conservative mode, don't unguard unless the address is in the VM - if (addr != last_addr && - (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) { - - // Set memory to RWX and retry - address page_start = align_down(addr, page_size); - bool res = os::protect_memory((char*) page_start, page_size, - os::MEM_PROT_RWX); - - log_debug(os)("Execution protection violation " - "at " INTPTR_FORMAT - ", unguarding " INTPTR_FORMAT ": %s, errno=%d", p2i(addr), - p2i(page_start), (res ? "success" : "failed"), errno); - stub = pc; - - // Set last_addr so if we fault again at the same address, we don't end - // up in an endless loop. - // - // There are two potential complications here. Two threads trapping at - // the same address at the same time could cause one of the threads to - // think it already unguarded, and abort the VM. Likely very rare. - // - // The other race involves two threads alternately trapping at - // different addresses and failing to unguard the page, resulting in - // an endless loop. This condition is probably even more unlikely than - // the first. - // - // Although both cases could be avoided by using locks or thread local - // last_addr, these solutions are unnecessary complication: this - // handler is a best-effort safety net, not a complete solution. It is - // disabled by default and should only be used as a workaround in case - // we missed any no-execute-unsafe VM code. - - last_addr = addr; - } - } - } -#endif // !AMD64 - if (stub != nullptr) { // save all thread context in case we need to restore it if (thread != nullptr) thread->set_saved_exception_pc(pc); @@ -426,26 +311,7 @@ bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info, } void os::Linux::init_thread_fpu_state(void) { -#ifndef AMD64 - // set fpu to 53 bit precision - set_fpu_control_word(0x27f); -#endif // !AMD64 -} - -int os::Linux::get_fpu_control_word(void) { -#ifdef AMD64 - return 0; -#else - int fpu_control; - _FPU_GETCW(fpu_control); - return fpu_control & 0xffff; -#endif // AMD64 -} - -void os::Linux::set_fpu_control_word(int fpu_control) { -#ifndef AMD64 - _FPU_SETCW(fpu_control); -#endif // !AMD64 + // Nothing to do. } juint os::cpu_microcode_revision() { @@ -491,21 +357,12 @@ juint os::cpu_microcode_revision() { // HotSpot guard pages is added later. size_t os::_compiler_thread_min_stack_allowed = 48 * K; size_t os::_java_thread_min_stack_allowed = 40 * K; -#ifdef _LP64 size_t os::_vm_internal_thread_min_stack_allowed = 64 * K; -#else -size_t os::_vm_internal_thread_min_stack_allowed = (48 DEBUG_ONLY(+ 4)) * K; -#endif // _LP64 // return default stack size for thr_type size_t os::Posix::default_stack_size(os::ThreadType thr_type) { // default stack size (compiler thread needs larger stack) -#ifdef AMD64 - size_t s = (thr_type == os::compiler_thread ? 4 * M : 1 * M); -#else - size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); -#endif // AMD64 - return s; + return (thr_type == os::compiler_thread ? 4 * M : 1 * M); } ///////////////////////////////////////////////////////////////////////////// @@ -517,7 +374,6 @@ void os::print_context(outputStream *st, const void *context) { const ucontext_t *uc = (const ucontext_t*)context; st->print_cr("Registers:"); -#ifdef AMD64 st->print( "RAX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[REG_RAX]); st->print(", RBX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[REG_RBX]); st->print(", RCX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[REG_RCX]); @@ -559,27 +415,12 @@ void os::print_context(outputStream *st, const void *context) { } st->print(" MXCSR=" UINT32_FORMAT_X_0, uc->uc_mcontext.fpregs->mxcsr); } -#else - st->print( "EAX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EAX]); - st->print(", EBX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EBX]); - st->print(", ECX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ECX]); - st->print(", EDX=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EDX]); - st->cr(); - st->print( "ESP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_UESP]); - st->print(", EBP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EBP]); - st->print(", ESI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_ESI]); - st->print(", EDI=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EDI]); - st->cr(); - st->print( "EIP=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EIP]); - st->print(", EFLAGS=" INTPTR_FORMAT, uc->uc_mcontext.gregs[REG_EFL]); - st->print(", CR2=" UINT64_FORMAT_X_0, (uint64_t)uc->uc_mcontext.cr2); -#endif // AMD64 st->cr(); st->cr(); } void os::print_register_info(outputStream *st, const void *context, int& continuation) { - const int register_count = AMD64_ONLY(16) NOT_AMD64(8); + const int register_count = 16; int n = continuation; assert(n >= 0 && n <= register_count, "Invalid continuation value"); if (context == nullptr || n == register_count) { @@ -592,7 +433,6 @@ void os::print_register_info(outputStream *st, const void *context, int& continu continuation = n + 1; # define CASE_PRINT_REG(n, str, id) case n: st->print(str); print_location(st, uc->uc_mcontext.gregs[REG_##id]); switch (n) { -#ifdef AMD64 CASE_PRINT_REG( 0, "RAX=", RAX); break; CASE_PRINT_REG( 1, "RBX=", RBX); break; CASE_PRINT_REG( 2, "RCX=", RCX); break; @@ -609,16 +449,6 @@ void os::print_register_info(outputStream *st, const void *context, int& continu CASE_PRINT_REG(13, "R13=", R13); break; CASE_PRINT_REG(14, "R14=", R14); break; CASE_PRINT_REG(15, "R15=", R15); break; -#else - CASE_PRINT_REG(0, "EAX=", EAX); break; - CASE_PRINT_REG(1, "EBX=", EBX); break; - CASE_PRINT_REG(2, "ECX=", ECX); break; - CASE_PRINT_REG(3, "EDX=", EDX); break; - CASE_PRINT_REG(4, "ESP=", ESP); break; - CASE_PRINT_REG(5, "EBP=", EBP); break; - CASE_PRINT_REG(6, "ESI=", ESI); break; - CASE_PRINT_REG(7, "EDI=", EDI); break; -#endif // AMD64 } # undef CASE_PRINT_REG ++n; @@ -626,18 +456,12 @@ void os::print_register_info(outputStream *st, const void *context, int& continu } void os::setup_fpu() { -#ifndef AMD64 - address fpu_cntrl = StubRoutines::x86::addr_fpu_cntrl_wrd_std(); - __asm__ volatile ( "fldcw (%0)" : - : "r" (fpu_cntrl) : "memory"); -#endif // !AMD64 + // Nothing to do. } #ifndef PRODUCT void os::verify_stack_alignment() { -#ifdef AMD64 assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); -#endif } #endif diff --git a/src/hotspot/os_cpu/linux_x86/os_linux_x86.inline.hpp b/src/hotspot/os_cpu/linux_x86/os_linux_x86.inline.hpp index 535f318cfbd6e..19782767504f8 100644 --- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.inline.hpp +++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.inline.hpp @@ -29,18 +29,11 @@ // See http://www.technovelty.org/code/c/reading-rdtsc.htl for details inline jlong os::rdtsc() { -#ifndef AMD64 - // 64 bit result in edx:eax - uint64_t res; - __asm__ __volatile__ ("rdtsc" : "=A" (res)); - return (jlong)res; -#else uint64_t res; uint32_t ts1, ts2; __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2)); res = ((uint64_t)ts1 | (uint64_t)ts2 << 32); return (jlong)res; -#endif // AMD64 } #endif // OS_CPU_LINUX_X86_OS_LINUX_X86_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_x86/prefetch_linux_x86.inline.hpp b/src/hotspot/os_cpu/linux_x86/prefetch_linux_x86.inline.hpp index cf60c2cbd6b17..9b6f118950499 100644 --- a/src/hotspot/os_cpu/linux_x86/prefetch_linux_x86.inline.hpp +++ b/src/hotspot/os_cpu/linux_x86/prefetch_linux_x86.inline.hpp @@ -29,19 +29,13 @@ inline void Prefetch::read (const void *loc, intx interval) { -#ifdef AMD64 __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval)); -#endif // AMD64 } inline void Prefetch::write(void *loc, intx interval) { -#ifdef AMD64 - // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. // __asm__ ("prefetchw (%0,%1,1)" : : "r" (loc), "r" (interval)); __asm__ ("prefetcht0 (%0,%1,1)" : : "r" (loc), "r" (interval)); - -#endif // AMD64 } #endif // OS_CPU_LINUX_X86_PREFETCH_LINUX_X86_INLINE_HPP diff --git a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S b/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S deleted file mode 100644 index 73f6cdf38c9cc..0000000000000 --- a/src/hotspot/os_cpu/linux_x86/safefetch_linux_x86_32.S +++ /dev/null @@ -1,41 +0,0 @@ -# -# Copyright (c) 2022 SAP SE. All rights reserved. -# Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# - -#include "defs.S.inc" - - .text - - # Support for int SafeFetch32(int* address, int defaultval); - # - # 8(%esp) : default value - # 4(%esp) : crash address - # 0(%esp) : return pc -DECLARE_FUNC(SafeFetch32_impl): - movl 4(%esp),%ecx # load address from stack -DECLARE_FUNC(_SafeFetch32_fault): - movl (%ecx), %eax # load target value, may fault - ret -DECLARE_FUNC(_SafeFetch32_continuation): - movl 8(%esp),%eax # load default value from stack - ret diff --git a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp index e8d67bcdddc28..b0bcad10150ca 100644 --- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp +++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp @@ -280,14 +280,6 @@ void os::Linux::init_thread_fpu_state(void) { // Nothing to do } -int os::Linux::get_fpu_control_word() { - ShouldNotCallThis(); - return -1; // silence compile warnings -} - -void os::Linux::set_fpu_control_word(int fpu) { - ShouldNotCallThis(); -} /////////////////////////////////////////////////////////////////////////////// // thread stack diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp index f084f506bf587..58cc2a7ec00b4 100644 --- a/src/hotspot/share/adlc/archDesc.cpp +++ b/src/hotspot/share/adlc/archDesc.cpp @@ -756,7 +756,7 @@ bool ArchDesc::check_usage() { callback.do_form_by_name("vecA"); callback.do_form_by_name("vecD"); callback.do_form_by_name("vecX"); -#elif defined(IA32) || defined(AMD64) +#elif defined(AMD64) callback.do_form_by_name("vecS"); callback.do_form_by_name("vecD"); callback.do_form_by_name("vecX"); diff --git a/src/hotspot/share/adlc/dfa.cpp b/src/hotspot/share/adlc/dfa.cpp index 9b3b87e13a90b..9c3d76cba55d5 100644 --- a/src/hotspot/share/adlc/dfa.cpp +++ b/src/hotspot/share/adlc/dfa.cpp @@ -461,7 +461,7 @@ void ArchDesc::buildDFA(FILE* fp) { class dfa_shared_preds { - enum { count = 3 IA32_ONLY( + 1 ) }; + enum { count = 3 }; static bool _found[count]; static const char* _type [count]; @@ -572,10 +572,10 @@ class dfa_shared_preds { } }; // shared predicates, _var and _pred entry should be the same length -bool dfa_shared_preds::_found[dfa_shared_preds::count] = { false, false, false IA32_ONLY(COMMA false) }; -const char* dfa_shared_preds::_type [dfa_shared_preds::count] = { "int", "jlong", "intptr_t" IA32_ONLY(COMMA "bool") }; -const char* dfa_shared_preds::_var [dfa_shared_preds::count] = { "_n_get_int__", "_n_get_long__", "_n_get_intptr_t__" IA32_ONLY(COMMA "Compile__current____select_24_bit_instr__") }; -const char* dfa_shared_preds::_pred [dfa_shared_preds::count] = { "n->get_int()", "n->get_long()", "n->get_intptr_t()" IA32_ONLY(COMMA "Compile::current()->select_24_bit_instr()") }; +bool dfa_shared_preds::_found[dfa_shared_preds::count] = { false, false, false }; +const char* dfa_shared_preds::_type [dfa_shared_preds::count] = { "int", "jlong", "intptr_t" }; +const char* dfa_shared_preds::_var [dfa_shared_preds::count] = { "_n_get_int__", "_n_get_long__", "_n_get_intptr_t__" }; +const char* dfa_shared_preds::_pred [dfa_shared_preds::count] = { "n->get_int()", "n->get_long()", "n->get_intptr_t()" }; // Helper method to check whether a node is vector unary operation. static bool is_vector_unary_op_name(const char* op_name) { diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index dfa414ef56484..739cc6261feca 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -4219,9 +4219,7 @@ int MatchRule::is_expensive() const { strcmp(opType,"DecodeNKlass")==0 || strcmp(opType,"FmaD") == 0 || strcmp(opType,"FmaF") == 0 || - strcmp(opType,"RoundDouble")==0 || strcmp(opType,"RoundDoubleMode")==0 || - strcmp(opType,"RoundFloat")==0 || strcmp(opType,"ReverseBytesI")==0 || strcmp(opType,"ReverseBytesL")==0 || strcmp(opType,"ReverseBytesUS")==0 || diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp index cc6ed278b4901..2e9ad70049ab8 100644 --- a/src/hotspot/share/adlc/output_c.cpp +++ b/src/hotspot/share/adlc/output_c.cpp @@ -2350,7 +2350,7 @@ class DefineEmitState { if (strcmp(rep_var,"$Register") == 0) return "as_Register"; if (strcmp(rep_var,"$KRegister") == 0) return "as_KRegister"; if (strcmp(rep_var,"$FloatRegister") == 0) return "as_FloatRegister"; -#if defined(IA32) || defined(AMD64) +#if defined(AMD64) if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister"; #endif if (strcmp(rep_var,"$CondRegister") == 0) return "as_ConditionRegister"; diff --git a/src/hotspot/share/c1/c1_Canonicalizer.cpp b/src/hotspot/share/c1/c1_Canonicalizer.cpp index 87657038a4ce0..f97844981ebf3 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.cpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.cpp @@ -868,7 +868,6 @@ void Canonicalizer::do_Throw (Throw* x) {} void Canonicalizer::do_Base (Base* x) {} void Canonicalizer::do_OsrEntry (OsrEntry* x) {} void Canonicalizer::do_ExceptionObject(ExceptionObject* x) {} -void Canonicalizer::do_RoundFP (RoundFP* x) {} void Canonicalizer::do_UnsafeGet (UnsafeGet* x) {} void Canonicalizer::do_UnsafePut (UnsafePut* x) {} void Canonicalizer::do_UnsafeGetAndSet(UnsafeGetAndSet* x) {} diff --git a/src/hotspot/share/c1/c1_Canonicalizer.hpp b/src/hotspot/share/c1/c1_Canonicalizer.hpp index 8c7651256e95d..f1c99d4996c0e 100644 --- a/src/hotspot/share/c1/c1_Canonicalizer.hpp +++ b/src/hotspot/share/c1/c1_Canonicalizer.hpp @@ -88,7 +88,6 @@ class Canonicalizer: InstructionVisitor { virtual void do_Base (Base* x); virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); - virtual void do_RoundFP (RoundFP* x); virtual void do_UnsafeGet (UnsafeGet* x); virtual void do_UnsafePut (UnsafePut* x); virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp index 9abfa45785bda..72f6c69a71c1d 100644 --- a/src/hotspot/share/c1/c1_CodeStubs.hpp +++ b/src/hotspot/share/c1/c1_CodeStubs.hpp @@ -127,35 +127,6 @@ class CounterOverflowStub: public CodeStub { }; -class ConversionStub: public CodeStub { - private: - Bytecodes::Code _bytecode; - LIR_Opr _input; - LIR_Opr _result; - - static float float_zero; - static double double_zero; - public: - ConversionStub(Bytecodes::Code bytecode, LIR_Opr input, LIR_Opr result) - : _bytecode(bytecode), _input(input), _result(result) { - NOT_IA32( ShouldNotReachHere(); ) // used only on x86-32 - } - - Bytecodes::Code bytecode() { return _bytecode; } - LIR_Opr input() { return _input; } - LIR_Opr result() { return _result; } - - virtual void emit_code(LIR_Assembler* e); - virtual void visit(LIR_OpVisitState* visitor) { - visitor->do_slow_case(); - visitor->do_input(_input); - visitor->do_output(_result); - } -#ifndef PRODUCT - virtual void print_name(outputStream* out) const { out->print("ConversionStub"); } -#endif // PRODUCT -}; - // Throws ArrayIndexOutOfBoundsException by default but can be // configured to throw IndexOutOfBoundsException in constructor diff --git a/src/hotspot/share/c1/c1_Defs.hpp b/src/hotspot/share/c1/c1_Defs.hpp index 0e7b120ef8d6a..ec687f6a905fc 100644 --- a/src/hotspot/share/c1/c1_Defs.hpp +++ b/src/hotspot/share/c1/c1_Defs.hpp @@ -45,12 +45,6 @@ enum { }; -// the processor may require explicit rounding operations to implement the strictFP mode -enum { - strict_fp_requires_explicit_rounding = pd_strict_fp_requires_explicit_rounding -}; - - // for debug info: a float value in a register may be saved in double precision by runtime stubs enum { float_saved_as_double = pd_float_saved_as_double diff --git a/src/hotspot/share/c1/c1_FpuStackSim.hpp b/src/hotspot/share/c1/c1_FpuStackSim.hpp deleted file mode 100644 index 3fcf1103eb55b..0000000000000 --- a/src/hotspot/share/c1/c1_FpuStackSim.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - * - */ - -#ifndef SHARE_C1_C1_FPUSTACKSIM_HPP -#define SHARE_C1_C1_FPUSTACKSIM_HPP - -#include "c1/c1_FrameMap.hpp" -#include "utilities/macros.hpp" - -// Provides location for forward declaration of this class, which is -// only implemented on Intel -class FpuStackSim; - -#include CPU_HEADER(c1_FpuStackSim) - -#endif // SHARE_C1_C1_FPUSTACKSIM_HPP diff --git a/src/hotspot/share/c1/c1_GraphBuilder.cpp b/src/hotspot/share/c1/c1_GraphBuilder.cpp index 9925c592f6f10..234f26c54b962 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.cpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.cpp @@ -674,17 +674,6 @@ class MemoryBuffer: public CompilationResourceObj { return load; } - if (strict_fp_requires_explicit_rounding && load->type()->is_float_kind()) { -#ifdef IA32 - if (UseSSE < 2) { - // can't skip load since value might get rounded as a side effect - return load; - } -#else - Unimplemented(); -#endif // IA32 - } - ciField* field = load->field(); Value object = load->obj(); if (field->holder()->is_loaded() && !field->is_volatile()) { @@ -1053,7 +1042,7 @@ void GraphBuilder::store_local(ValueStack* state, Value x, int index) { } } - state->store_local(index, round_fp(x)); + state->store_local(index, x); } @@ -1204,11 +1193,7 @@ void GraphBuilder::stack_op(Bytecodes::Code code) { void GraphBuilder::arithmetic_op(ValueType* type, Bytecodes::Code code, ValueStack* state_before) { Value y = pop(type); Value x = pop(type); - Value res = new ArithmeticOp(code, x, y, state_before); - // Note: currently single-precision floating-point rounding on Intel is handled at the LIRGenerator level - res = append(res); - res = round_fp(res); - push(type, res); + push(type, append(new ArithmeticOp(code, x, y, state_before))); } @@ -2229,7 +2214,7 @@ void GraphBuilder::invoke(Bytecodes::Code code) { append_split(result); if (result_type != voidType) { - push(result_type, round_fp(result)); + push(result_type, result); } if (profile_return() && result_type->is_object_kind()) { profile_return_type(result, target); @@ -2356,30 +2341,6 @@ void GraphBuilder::throw_op(int bci) { append_with_bci(t, bci); } - -Value GraphBuilder::round_fp(Value fp_value) { - if (strict_fp_requires_explicit_rounding) { -#ifdef IA32 - // no rounding needed if SSE2 is used - if (UseSSE < 2) { - // Must currently insert rounding node for doubleword values that - // are results of expressions (i.e., not loads from memory or - // constants) - if (fp_value->type()->tag() == doubleTag && - fp_value->as_Constant() == nullptr && - fp_value->as_Local() == nullptr && // method parameters need no rounding - fp_value->as_RoundFP() == nullptr) { - return append(new RoundFP(fp_value)); - } - } -#else - Unimplemented(); -#endif // IA32 - } - return fp_value; -} - - Instruction* GraphBuilder::append_with_bci(Instruction* instr, int bci) { Canonicalizer canon(compilation(), instr, bci); Instruction* i1 = canon.canonical(); diff --git a/src/hotspot/share/c1/c1_GraphBuilder.hpp b/src/hotspot/share/c1/c1_GraphBuilder.hpp index 270c344833ef8..9b9ee0072ad63 100644 --- a/src/hotspot/share/c1/c1_GraphBuilder.hpp +++ b/src/hotspot/share/c1/c1_GraphBuilder.hpp @@ -266,7 +266,6 @@ class GraphBuilder { void monitorexit(Value x, int bci); void new_multi_array(int dimensions); void throw_op(int bci); - Value round_fp(Value fp_value); // stack/code manipulation helpers Instruction* append_with_bci(Instruction* instr, int bci); diff --git a/src/hotspot/share/c1/c1_Instruction.hpp b/src/hotspot/share/c1/c1_Instruction.hpp index e950afc981d19..af22d5b8fc9aa 100644 --- a/src/hotspot/share/c1/c1_Instruction.hpp +++ b/src/hotspot/share/c1/c1_Instruction.hpp @@ -91,7 +91,6 @@ class LookupSwitch; class Return; class Throw; class Base; -class RoundFP; class UnsafeOp; class UnsafeGet; class UnsafePut; @@ -187,7 +186,6 @@ class InstructionVisitor: public StackObj { virtual void do_Base (Base* x) = 0; virtual void do_OsrEntry (OsrEntry* x) = 0; virtual void do_ExceptionObject(ExceptionObject* x) = 0; - virtual void do_RoundFP (RoundFP* x) = 0; virtual void do_UnsafeGet (UnsafeGet* x) = 0; virtual void do_UnsafePut (UnsafePut* x) = 0; virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x) = 0; @@ -556,7 +554,6 @@ class Instruction: public CompilationResourceObj { virtual Return* as_Return() { return nullptr; } virtual Throw* as_Throw() { return nullptr; } virtual Base* as_Base() { return nullptr; } - virtual RoundFP* as_RoundFP() { return nullptr; } virtual ExceptionObject* as_ExceptionObject() { return nullptr; } virtual UnsafeOp* as_UnsafeOp() { return nullptr; } virtual ProfileInvoke* as_ProfileInvoke() { return nullptr; } @@ -2142,30 +2139,6 @@ LEAF(ExceptionObject, Instruction) }; -// Models needed rounding for floating-point values on Intel. -// Currently only used to represent rounding of double-precision -// values stored into local variables, but could be used to model -// intermediate rounding of single-precision values as well. -LEAF(RoundFP, Instruction) - private: - Value _input; // floating-point value to be rounded - - public: - RoundFP(Value input) - : Instruction(input->type()) // Note: should not be used for constants - , _input(input) - { - ASSERT_VALUES - } - - // accessors - Value input() const { return _input; } - - // generic - virtual void input_values_do(ValueVisitor* f) { f->visit(&_input); } -}; - - BASE(UnsafeOp, Instruction) private: Value _object; // Object to be fetched from or mutated diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.cpp b/src/hotspot/share/c1/c1_InstructionPrinter.cpp index 5f865ae518d00..3a0eab20f58bf 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.cpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.cpp @@ -780,12 +780,6 @@ void InstructionPrinter::do_ExceptionObject(ExceptionObject* x) { output()->print("incoming exception"); } - -void InstructionPrinter::do_RoundFP(RoundFP* x) { - output()->print("round_fp "); - print_value(x->input()); -} - void InstructionPrinter::do_UnsafeGet(UnsafeGet* x) { print_unsafe_op(x, x->is_raw() ? "UnsafeGet (raw)" : "UnsafeGet"); output()->put(')'); diff --git a/src/hotspot/share/c1/c1_InstructionPrinter.hpp b/src/hotspot/share/c1/c1_InstructionPrinter.hpp index 0e5ba78bdc762..6bc31d3fe55d8 100644 --- a/src/hotspot/share/c1/c1_InstructionPrinter.hpp +++ b/src/hotspot/share/c1/c1_InstructionPrinter.hpp @@ -120,7 +120,6 @@ class InstructionPrinter: public InstructionVisitor { virtual void do_Base (Base* x); virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); - virtual void do_RoundFP (RoundFP* x); virtual void do_UnsafeGet (UnsafeGet* x); virtual void do_UnsafePut (UnsafePut* x); virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp index 048eb6047ede9..8e191ab5b5b88 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -404,7 +404,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { switch (op->code()) { // LIR_Op0 - case lir_fpop_raw: // result and info always invalid case lir_breakpoint: // result and info always invalid case lir_membar: // result and info always invalid case lir_membar_acquire: // result and info always invalid @@ -444,8 +443,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { // LIR_Op1 - case lir_fxch: // input always valid, result and info always invalid - case lir_fld: // input always valid, result and info always invalid case lir_push: // input always valid, result and info always invalid case lir_pop: // input always valid, result and info always invalid case lir_leal: // input and result always valid, info always invalid @@ -497,7 +494,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { assert(opConvert->_info == nullptr, "must be"); if (opConvert->_opr->is_valid()) do_input(opConvert->_opr); if (opConvert->_result->is_valid()) do_output(opConvert->_result); - do_stub(opConvert->_stub); break; } @@ -544,20 +540,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) { } -// LIR_OpRoundFP; - case lir_roundfp: { - assert(op->as_OpRoundFP() != nullptr, "must be"); - LIR_OpRoundFP* opRoundFP = (LIR_OpRoundFP*)op; - - assert(op->_info == nullptr, "info not used by this instruction"); - assert(opRoundFP->_tmp->is_illegal(), "not used"); - do_input(opRoundFP->_opr); - do_output(opRoundFP->_result); - - break; - } - - // LIR_Op2 case lir_cmp: case lir_cmp_l2i: @@ -1037,9 +1019,6 @@ void LIR_OpBranch::emit_code(LIR_Assembler* masm) { void LIR_OpConvert::emit_code(LIR_Assembler* masm) { masm->emit_opConvert(this); - if (stub() != nullptr) { - masm->append_code_stub(stub()); - } } void LIR_Op2::emit_code(LIR_Assembler* masm) { @@ -1714,12 +1693,9 @@ const char * LIR_Op::name() const { case lir_on_spin_wait: s = "on_spin_wait"; break; case lir_std_entry: s = "std_entry"; break; case lir_osr_entry: s = "osr_entry"; break; - case lir_fpop_raw: s = "fpop_raw"; break; case lir_breakpoint: s = "breakpoint"; break; case lir_get_thread: s = "get_thread"; break; // LIR_Op1 - case lir_fxch: s = "fxch"; break; - case lir_fld: s = "fld"; break; case lir_push: s = "push"; break; case lir_pop: s = "pop"; break; case lir_null_check: s = "null_check"; break; @@ -1729,7 +1705,6 @@ const char * LIR_Op::name() const { case lir_branch: s = "branch"; break; case lir_cond_float_branch: s = "flt_cond_br"; break; case lir_move: s = "move"; break; - case lir_roundfp: s = "roundfp"; break; case lir_rtcall: s = "rtcall"; break; case lir_throw: s = "throw"; break; case lir_unwind: s = "unwind"; break; @@ -1974,12 +1949,6 @@ void LIR_OpAllocObj::print_instr(outputStream* out) const { out->print("[lbl:" INTPTR_FORMAT "]", p2i(stub()->entry())); } -void LIR_OpRoundFP::print_instr(outputStream* out) const { - _opr->print(out); out->print(" "); - tmp()->print(out); out->print(" "); - result_opr()->print(out); out->print(" "); -} - // LIR_Op2 void LIR_Op2::print_instr(outputStream* out) const { if (code() == lir_cmp || code() == lir_branch || code() == lir_cond_float_branch) { diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp index c568caeca4b30..ebb43aed9c821 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -43,7 +43,6 @@ class LIR_Op; class ciType; class ValueType; class LIR_OpVisitState; -class FpuStackSim; //--------------------------------------------------------------------- // LIR Operands @@ -884,7 +883,6 @@ class LIR_OpBranch; class LIR_OpConvert; class LIR_OpAllocObj; class LIR_OpReturn; -class LIR_OpRoundFP; class LIR_Op2; class LIR_OpDelay; class LIR_Op3; @@ -913,7 +911,6 @@ enum LIR_Code { , lir_nop , lir_std_entry , lir_osr_entry - , lir_fpop_raw , lir_breakpoint , lir_rtcall , lir_membar @@ -927,8 +924,6 @@ enum LIR_Code { , lir_on_spin_wait , end_op0 , begin_op1 - , lir_fxch - , lir_fld , lir_push , lir_pop , lir_null_check @@ -938,7 +933,6 @@ enum LIR_Code { , lir_convert , lir_alloc_object , lir_monaddr - , lir_roundfp , lir_safepoint , lir_unwind , lir_load_klass @@ -1149,7 +1143,6 @@ class LIR_Op: public CompilationResourceObj { virtual LIR_OpLock* as_OpLock() { return nullptr; } virtual LIR_OpAllocArray* as_OpAllocArray() { return nullptr; } virtual LIR_OpAllocObj* as_OpAllocObj() { return nullptr; } - virtual LIR_OpRoundFP* as_OpRoundFP() { return nullptr; } virtual LIR_OpBranch* as_OpBranch() { return nullptr; } virtual LIR_OpReturn* as_OpReturn() { return nullptr; } virtual LIR_OpRTCall* as_OpRTCall() { return nullptr; } @@ -1446,23 +1439,18 @@ class LIR_OpReturn: public LIR_Op1 { virtual LIR_OpReturn* as_OpReturn() { return this; } }; -class ConversionStub; - class LIR_OpConvert: public LIR_Op1 { friend class LIR_OpVisitState; private: Bytecodes::Code _bytecode; - ConversionStub* _stub; public: - LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result, ConversionStub* stub) + LIR_OpConvert(Bytecodes::Code code, LIR_Opr opr, LIR_Opr result) : LIR_Op1(lir_convert, opr, result) - , _bytecode(code) - , _stub(stub) {} + , _bytecode(code) {} Bytecodes::Code bytecode() const { return _bytecode; } - ConversionStub* stub() const { return _stub; } virtual void emit_code(LIR_Assembler* masm); virtual LIR_OpConvert* as_OpConvert() { return this; } @@ -1517,23 +1505,6 @@ class LIR_OpAllocObj : public LIR_Op1 { }; -// LIR_OpRoundFP -class LIR_OpRoundFP : public LIR_Op1 { - friend class LIR_OpVisitState; - - private: - LIR_Opr _tmp; - - public: - LIR_OpRoundFP(LIR_Opr reg, LIR_Opr stack_loc_temp, LIR_Opr result) - : LIR_Op1(lir_roundfp, reg, result) - , _tmp(stack_loc_temp) {} - - LIR_Opr tmp() const { return _tmp; } - virtual LIR_OpRoundFP* as_OpRoundFP() { return this; } - void print_instr(outputStream* out) const PRODUCT_RETURN; -}; - // LIR_OpTypeCheck class LIR_OpTypeCheck: public LIR_Op { friend class LIR_OpVisitState; @@ -2202,9 +2173,6 @@ class LIR_List: public CompilationResourceObj { void leal(LIR_Opr from, LIR_Opr result_reg, LIR_PatchCode patch_code = lir_patch_none, CodeEmitInfo* info = nullptr) { append(new LIR_Op1(lir_leal, from, result_reg, T_ILLEGAL, patch_code, info)); } - // result is a stack location for old backend and vreg for UseLinearScan - // stack_loc_temp is an illegal register for old backend - void roundfp(LIR_Opr reg, LIR_Opr stack_loc_temp, LIR_Opr result) { append(new LIR_OpRoundFP(reg, stack_loc_temp, result)); } void move(LIR_Opr src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(new LIR_Op1(lir_move, src, dst, dst->type(), lir_patch_none, info)); } void move(LIR_Address* src, LIR_Opr dst, CodeEmitInfo* info = nullptr) { append(new LIR_Op1(lir_move, LIR_OprFact::address(src), dst, src->type(), lir_patch_none, info)); } void move(LIR_Opr src, LIR_Address* dst, CodeEmitInfo* info = nullptr) { append(new LIR_Op1(lir_move, src, LIR_OprFact::address(dst), dst->type(), lir_patch_none, info)); } @@ -2233,7 +2201,7 @@ class LIR_List: public CompilationResourceObj { void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); } void return_op(LIR_Opr result) { append(new LIR_OpReturn(result)); } - void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = nullptr/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); } + void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst) { append(new LIR_OpConvert(code, left, dst)); } void logical_and (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_and, left, right, dst)); } void logical_or (LIR_Opr left, LIR_Opr right, LIR_Opr dst) { append(new LIR_Op2(lir_logic_or, left, right, dst)); } diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp index 0fa4b3a4c93d7..1f3704458c3c3 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.cpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp @@ -486,19 +486,6 @@ void LIR_Assembler::emit_call(LIR_OpJavaCall* op) { if (op->is_method_handle_invoke()) { compilation()->set_has_method_handle_invokes(true); } - -#if defined(IA32) && defined(COMPILER2) - // C2 leave fpu stack dirty clean it - if (UseSSE < 2 && !CompilerConfig::is_c1_only_no_jvmci()) { - int i; - for ( i = 1; i <= 7 ; i++ ) { - ffree(i); - } - if (!op->result_opr()->is_float_kind()) { - ffree(0); - } - } -#endif // IA32 && COMPILER2 } @@ -520,12 +507,6 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { } break; - case lir_roundfp: { - LIR_OpRoundFP* round_op = op->as_OpRoundFP(); - roundfp_op(round_op->in_opr(), round_op->tmp(), round_op->result_opr(), round_op->pop_fpu_stack()); - break; - } - case lir_return: { assert(op->as_OpReturn() != nullptr, "sanity"); LIR_OpReturn *ret_op = (LIR_OpReturn*)op; @@ -543,16 +524,6 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) { safepoint_poll(op->in_opr(), op->info()); break; -#ifdef IA32 - case lir_fxch: - fxch(op->in_opr()->as_jint()); - break; - - case lir_fld: - fld(op->in_opr()->as_jint()); - break; -#endif // IA32 - case lir_branch: break; @@ -628,12 +599,6 @@ void LIR_Assembler::emit_op0(LIR_Op0* op) { osr_entry(); break; -#ifdef IA32 - case lir_fpop_raw: - fpop(); - break; -#endif // IA32 - case lir_breakpoint: breakpoint(); break; @@ -777,17 +742,6 @@ void LIR_Assembler::build_frame() { _masm->build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes()); } - -void LIR_Assembler::roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack) { - assert(strict_fp_requires_explicit_rounding, "not required"); - assert((src->is_single_fpu() && dest->is_single_stack()) || - (src->is_double_fpu() && dest->is_double_stack()), - "round_fp: rounds register -> stack location"); - - reg2stack (src, dest, src->type(), pop_fpu_stack); -} - - void LIR_Assembler::move_op(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide) { if (src->is_register()) { if (dest->is_register()) { diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp index eb89e3ea24870..6d8ca4b64ed36 100644 --- a/src/hotspot/share/c1/c1_LIRAssembler.hpp +++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp @@ -215,7 +215,6 @@ class LIR_Assembler: public CompilationResourceObj { void logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest); - void roundfp_op(LIR_Opr src, LIR_Opr tmp, LIR_Opr dest, bool pop_fpu_stack); void move_op(LIR_Opr src, LIR_Opr result, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide); void volatile_move_op(LIR_Opr src, LIR_Opr result, BasicType type, CodeEmitInfo* info); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.cpp b/src/hotspot/share/c1/c1_LIRGenerator.cpp index 74fdf7a5b76a3..499358354e5bc 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.cpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.cpp @@ -881,28 +881,6 @@ void LIRGenerator::arraycopy_helper(Intrinsic* x, int* flagsp, ciArrayKlass** ex *expected_typep = (ciArrayKlass*)expected_type; } - -LIR_Opr LIRGenerator::round_item(LIR_Opr opr) { - assert(opr->is_register(), "why spill if item is not register?"); - - if (strict_fp_requires_explicit_rounding) { -#ifdef IA32 - if (UseSSE < 1 && opr->is_single_fpu()) { - LIR_Opr result = new_register(T_FLOAT); - set_vreg_flag(result, must_start_in_memory); - assert(opr->is_register(), "only a register can be spilled"); - assert(opr->value_type()->is_float(), "rounding only for floats available"); - __ roundfp(opr, LIR_OprFact::illegalOpr, result); - return result; - } -#else - Unimplemented(); -#endif // IA32 - } - return opr; -} - - LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) { assert(type2size[t] == type2size[value->type()], "size mismatch: t=%s, value->type()=%s", type2name(t), type2name(value->type())); @@ -1927,20 +1905,6 @@ void LIRGenerator::do_PreconditionsCheckIndex(Intrinsic* x, BasicType type) { // is neither lir_cond_equal nor lir_cond_notEqual, see LIR_Assembler::comp_op. LIR_Opr zero_reg = new_register(type); __ move(zero, zero_reg); -#if defined(X86) && !defined(_LP64) - // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. - LIR_Opr index_copy = new_register(index.type()); - // index >= 0 - __ move(index.result(), index_copy); - __ cmp(lir_cond_less, index_copy, zero_reg); - __ branch(lir_cond_less, new DeoptimizeStub(info, Deoptimization::Reason_range_check, - Deoptimization::Action_make_not_entrant)); - // index < length - __ move(index.result(), index_copy); - __ cmp(lir_cond_greaterEqual, index_copy, len); - __ branch(lir_cond_greaterEqual, new DeoptimizeStub(info, Deoptimization::Reason_range_check, - Deoptimization::Action_make_not_entrant)); -#else // index >= 0 __ cmp(lir_cond_less, index.result(), zero_reg); __ branch(lir_cond_less, new DeoptimizeStub(info, Deoptimization::Reason_range_check, @@ -1949,7 +1913,6 @@ void LIRGenerator::do_PreconditionsCheckIndex(Intrinsic* x, BasicType type) { __ cmp(lir_cond_greaterEqual, index.result(), len); __ branch(lir_cond_greaterEqual, new DeoptimizeStub(info, Deoptimization::Reason_range_check, Deoptimization::Action_make_not_entrant)); -#endif __ move(index.result(), result); } @@ -2115,25 +2078,6 @@ void LIRGenerator::do_Throw(Throw* x) { } -void LIRGenerator::do_RoundFP(RoundFP* x) { - assert(strict_fp_requires_explicit_rounding, "not required"); - - LIRItem input(x->input(), this); - input.load_item(); - LIR_Opr input_opr = input.result(); - assert(input_opr->is_register(), "why round if value is not in a register?"); - assert(input_opr->is_single_fpu() || input_opr->is_double_fpu(), "input should be floating-point value"); - if (input_opr->is_single_fpu()) { - set_result(x, round_item(input_opr)); // This code path not currently taken - } else { - LIR_Opr result = new_register(T_DOUBLE); - set_vreg_flag(result, must_start_in_memory); - __ roundfp(input_opr, LIR_OprFact::illegalOpr, result); - set_result(x, result); - } -} - - void LIRGenerator::do_UnsafeGet(UnsafeGet* x) { BasicType type = x->basic_type(); LIRItem src(x->object(), this); @@ -3227,14 +3171,7 @@ void LIRGenerator::do_ProfileInvoke(ProfileInvoke* x) { void LIRGenerator::increment_backedge_counter_conditionally(LIR_Condition cond, LIR_Opr left, LIR_Opr right, CodeEmitInfo* info, int left_bci, int right_bci, int bci) { if (compilation()->is_profiling()) { -#if defined(X86) && !defined(_LP64) - // BEWARE! On 32-bit x86 cmp clobbers its left argument so we need a temp copy. - LIR_Opr left_copy = new_register(left->type()); - __ move(left, left_copy); - __ cmp(cond, left_copy, right); -#else __ cmp(cond, left, right); -#endif LIR_Opr step = new_register(T_INT); LIR_Opr plus_one = LIR_OprFact::intConst(InvocationCounter::count_increment); LIR_Opr zero = LIR_OprFact::intConst(0); diff --git a/src/hotspot/share/c1/c1_LIRGenerator.hpp b/src/hotspot/share/c1/c1_LIRGenerator.hpp index a66758054d7d4..049fdbe9d1fca 100644 --- a/src/hotspot/share/c1/c1_LIRGenerator.hpp +++ b/src/hotspot/share/c1/c1_LIRGenerator.hpp @@ -233,7 +233,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { friend class LIRItem; - LIR_Opr round_item(LIR_Opr opr); LIR_Opr force_to_spill(LIR_Opr value, BasicType t); PhiResolverState& resolver_state() { return _resolver_state; } @@ -578,7 +577,6 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { virtual void do_Base (Base* x); virtual void do_OsrEntry (OsrEntry* x); virtual void do_ExceptionObject(ExceptionObject* x); - virtual void do_RoundFP (RoundFP* x); virtual void do_UnsafeGet (UnsafeGet* x); virtual void do_UnsafePut (UnsafePut* x); virtual void do_UnsafeGetAndSet(UnsafeGetAndSet* x); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp index a4d955e52a004..60e0fb1dfd9da 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp @@ -92,9 +92,6 @@ LinearScan::LinearScan(IR* ir, LIRGenerator* gen, FrameMap* frame_map) , _has_call(0) , _interval_in_loop(0) // initialized later with correct length , _scope_value_cache(0) // initialized later with correct length -#ifdef IA32 - , _fpu_stack_allocator(nullptr) -#endif { assert(this->ir() != nullptr, "check if valid"); assert(this->compilation() != nullptr, "check if valid"); @@ -1091,43 +1088,23 @@ IntervalUseKind LinearScan::use_kind_of_input_operand(LIR_Op* op, LIR_Opr opr) { // this operand is allowed to be on the stack in some cases BasicType opr_type = opr->type_register(); if (opr_type == T_FLOAT || opr_type == T_DOUBLE) { - if (IA32_ONLY( (UseSSE == 1 && opr_type == T_FLOAT) || UseSSE >= 2 ) NOT_IA32( true )) { - // SSE float instruction (T_DOUBLE only supported with SSE2) - switch (op->code()) { - case lir_cmp: - case lir_add: - case lir_sub: - case lir_mul: - case lir_div: - { - assert(op->as_Op2() != nullptr, "must be LIR_Op2"); - LIR_Op2* op2 = (LIR_Op2*)op; - if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) { - assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register"); - return shouldHaveRegister; - } - } - default: - break; - } - } else { - // FPU stack float instruction - switch (op->code()) { - case lir_add: - case lir_sub: - case lir_mul: - case lir_div: - { - assert(op->as_Op2() != nullptr, "must be LIR_Op2"); - LIR_Op2* op2 = (LIR_Op2*)op; - if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) { - assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register"); - return shouldHaveRegister; - } + // SSE float instruction (T_DOUBLE only supported with SSE2) + switch (op->code()) { + case lir_cmp: + case lir_add: + case lir_sub: + case lir_mul: + case lir_div: + { + assert(op->as_Op2() != nullptr, "must be LIR_Op2"); + LIR_Op2* op2 = (LIR_Op2*)op; + if (op2->in_opr1() != op2->in_opr2() && op2->in_opr2() == opr) { + assert((op2->result_opr()->is_register() || op->code() == lir_cmp) && op2->in_opr1()->is_register(), "cannot mark second operand as stack if others are not in register"); + return shouldHaveRegister; } - default: - break; } + default: + break; } // We want to sometimes use logical operations on pointers, in particular in GC barriers. // Since 64bit logical operations do not current support operands on stack, we have to make sure @@ -1288,30 +1265,21 @@ void LinearScan::build_intervals() { // virtual fpu operands. Otherwise no allocation for fpu registers is // performed and so the temp ranges would be useless if (has_fpu_registers()) { -#ifdef X86 - if (UseSSE < 2) { -#endif // X86 - for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) { - LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i); - assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands"); - assert(reg_numHi(opr) == -1, "missing addition of range for hi-register"); - caller_save_registers[num_caller_save_registers++] = reg_num(opr); - } -#ifdef X86 +#ifndef X86 + for (i = 0; i < FrameMap::nof_caller_save_fpu_regs; i++) { + LIR_Opr opr = FrameMap::caller_save_fpu_reg_at(i); + assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands"); + assert(reg_numHi(opr) == -1, "missing addition of range for hi-register"); + caller_save_registers[num_caller_save_registers++] = reg_num(opr); } -#endif // X86 - -#ifdef X86 - if (UseSSE > 0) { - int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms(); - for (i = 0; i < num_caller_save_xmm_regs; i ++) { - LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(i); - assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands"); - assert(reg_numHi(opr) == -1, "missing addition of range for hi-register"); - caller_save_registers[num_caller_save_registers++] = reg_num(opr); - } +#else + for (i = 0; i < FrameMap::get_num_caller_save_xmms(); i ++) { + LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(i); + assert(opr->is_valid() && opr->is_register(), "FrameMap should not return invalid operands"); + assert(reg_numHi(opr) == -1, "missing addition of range for hi-register"); + caller_save_registers[num_caller_save_registers++] = reg_num(opr); } -#endif // X86 +#endif // !X86 } assert(num_caller_save_registers <= LinearScan::nof_regs, "out of bounds"); @@ -1869,8 +1837,7 @@ void LinearScan::resolve_exception_entry(BlockBegin* block, int reg_num, MoveRes int reg = interval->assigned_reg(); int regHi = interval->assigned_regHi(); - if ((reg < nof_regs && interval->always_in_memory()) || - (use_fpu_stack_allocation() && reg >= pd_first_fpu_reg && reg <= pd_last_fpu_reg)) { + if ((reg < nof_regs && interval->always_in_memory())) { // the interval is split to get a short range that is located on the stack // in the following two cases: // * the interval started in memory (e.g. method parameter), but is currently in a register @@ -2159,37 +2126,29 @@ LIR_Opr LinearScan::calc_operand_for_interval(const Interval* interval) { #ifndef __SOFTFP__ case T_FLOAT: { #ifdef X86 - if (UseSSE >= 1) { - int last_xmm_reg = pd_last_xmm_reg; -#ifdef _LP64 - if (UseAVX < 3) { - last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1; - } -#endif // LP64 - assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register"); - assert(interval->assigned_regHi() == any_reg, "must not have hi register"); - return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg); + int last_xmm_reg = pd_last_xmm_reg; + if (UseAVX < 3) { + last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1; } -#endif // X86 - + assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register"); + assert(interval->assigned_regHi() == any_reg, "must not have hi register"); + return LIR_OprFact::single_xmm(assigned_reg - pd_first_xmm_reg); +#else assert(assigned_reg >= pd_first_fpu_reg && assigned_reg <= pd_last_fpu_reg, "no fpu register"); assert(interval->assigned_regHi() == any_reg, "must not have hi register"); return LIR_OprFact::single_fpu(assigned_reg - pd_first_fpu_reg); +#endif // X86 } case T_DOUBLE: { #ifdef X86 - if (UseSSE >= 2) { int last_xmm_reg = pd_last_xmm_reg; -#ifdef _LP64 if (UseAVX < 3) { last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1; } -#endif // LP64 assert(assigned_reg >= pd_first_xmm_reg && assigned_reg <= last_xmm_reg, "no xmm register"); assert(interval->assigned_regHi() == any_reg, "must not have hi register (double xmm values are stored in one register)"); return LIR_OprFact::double_xmm(assigned_reg - pd_first_xmm_reg); - } #endif // X86 #if defined(ARM32) @@ -2666,14 +2625,7 @@ int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArrayis_single_fpu()) { -#ifdef IA32 - // the exact location of fpu stack values is only known - // during fpu stack allocation, so the stack allocator object - // must be present - assert(use_fpu_stack_allocation(), "should not have float stack values without fpu stack allocation (all floats must be SSE2)"); - assert(_fpu_stack_allocator != nullptr, "must be present"); - opr = _fpu_stack_allocator->to_fpu_stack(opr); -#elif defined(AMD64) +#if defined(AMD64) assert(false, "FPU not used on x86-64"); #endif @@ -2758,17 +2710,8 @@ int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArrayis_double_xmm()) { assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation"); VMReg rname_first = opr->as_xmm_double_reg()->as_VMReg(); -# ifdef _LP64 first = new LocationValue(Location::new_reg_loc(Location::dbl, rname_first)); second = _int_0_scope_value; -# else - first = new LocationValue(Location::new_reg_loc(Location::normal, rname_first)); - // %%% This is probably a waste but we'll keep things as they were for now - if (true) { - VMReg rname_second = rname_first->next(); - second = new LocationValue(Location::new_reg_loc(Location::normal, rname_second)); - } -# endif #endif } else if (opr->is_double_fpu()) { @@ -2779,16 +2722,6 @@ int LinearScan::append_scope_value_for_operand(LIR_Opr opr, GrowableArrayto_fpu_stack(opr); - - assert(opr->fpu_regnrLo() == opr->fpu_regnrHi(), "assumed in calculation (only fpu_regnrLo is used)"); -#endif #ifdef AMD64 assert(false, "FPU not used on x86-64"); #endif @@ -3022,15 +2955,9 @@ void LinearScan::assign_reg_num(LIR_OpList* instructions, IntervalWalker* iw) { compute_oop_map(iw, visitor, op); // compute debug information - if (!use_fpu_stack_allocation()) { - // compute debug information if fpu stack allocation is not needed. - // when fpu stack allocation is needed, the debug information can not - // be computed here because the exact location of fpu operands is not known - // -> debug information is created inside the fpu stack allocator - int n = visitor.info_count(); - for (int k = 0; k < n; k++) { - compute_debug_info(visitor.info_at(k), op_id); - } + int n = visitor.info_count(); + for (int k = 0; k < n; k++) { + compute_debug_info(visitor.info_at(k), op_id); } } @@ -3127,14 +3054,6 @@ void LinearScan::do_linear_scan() { NOT_PRODUCT(print_lir(2, "LIR after assignment of register numbers:")); NOT_PRODUCT(LinearScanStatistic::compute(this, _stat_after_asign)); - { TIME_LINEAR_SCAN(timer_allocate_fpu_stack); - - if (use_fpu_stack_allocation()) { - allocate_fpu_stack(); // Only has effect on Intel - NOT_PRODUCT(print_lir(2, "LIR after FPU stack allocation:")); - } - } - #ifndef RISCV // Disable these optimizations on riscv temporarily, because it does not // work when the comparison operands are bound to branches or cmoves. @@ -3234,11 +3153,9 @@ LIR_Opr LinearScan::get_operand(int reg_num) { #ifdef X86 int last_xmm_reg = pd_last_xmm_reg; -#ifdef _LP64 if (UseAVX < 3) { last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1; } -#endif #endif if (reg_num >= pd_first_cpu_reg && reg_num <= pd_last_cpu_reg) { opr = LIR_OprFact::single_cpu(reg_num); @@ -6010,19 +5927,6 @@ bool EdgeMoveOptimizer::operations_different(LIR_Op* op1, LIR_Op* op2) { return false; } - } else if (op1->code() == lir_fxch && op2->code() == lir_fxch) { - assert(op1->as_Op1() != nullptr, "fxch must be LIR_Op1"); - assert(op2->as_Op1() != nullptr, "fxch must be LIR_Op1"); - LIR_Op1* fxch1 = (LIR_Op1*)op1; - LIR_Op1* fxch2 = (LIR_Op1*)op2; - if (fxch1->in_opr()->as_jint() == fxch2->in_opr()->as_jint()) { - // equal FPU stack operations can be optimized - return false; - } - - } else if (op1->code() == lir_fpop_raw && op2->code() == lir_fpop_raw) { - // equal FPU stack operations can be optimized - return false; } // no optimization possible @@ -6542,7 +6446,6 @@ const char* LinearScanStatistic::counter_name(int counter_idx) { case counter_throw: return "throw"; case counter_unwind: return "unwind"; case counter_typecheck: return "type+null-checks"; - case counter_fpu_stack: return "fpu-stack"; case counter_misc_inst: return "other instructions"; case counter_other_inst: return "misc. instructions"; @@ -6764,15 +6667,10 @@ void LinearScanStatistic::collect(LinearScan* allocator) { case lir_checkcast: case lir_store_check: inc_counter(counter_typecheck); break; - case lir_fpop_raw: - case lir_fxch: - case lir_fld: inc_counter(counter_fpu_stack); break; - case lir_nop: case lir_push: case lir_pop: case lir_convert: - case lir_roundfp: case lir_cmove: inc_counter(counter_misc_inst); break; default: inc_counter(counter_other_inst); break; @@ -6821,7 +6719,6 @@ const char* LinearScanTimers::timer_name(int idx) { case timer_sort_intervals_after: return "Sort Intervals After"; case timer_eliminate_spill_moves: return "Spill optimization"; case timer_assign_reg_num: return "Assign Reg Num"; - case timer_allocate_fpu_stack: return "Allocate FPU Stack"; case timer_optimize_lir: return "Optimize LIR"; default: ShouldNotReachHere(); return ""; } diff --git a/src/hotspot/share/c1/c1_LinearScan.hpp b/src/hotspot/share/c1/c1_LinearScan.hpp index 4e8adef3c7d79..8ffa1101d0ce2 100644 --- a/src/hotspot/share/c1/c1_LinearScan.hpp +++ b/src/hotspot/share/c1/c1_LinearScan.hpp @@ -25,7 +25,6 @@ #ifndef SHARE_C1_C1_LINEARSCAN_HPP #define SHARE_C1_C1_LINEARSCAN_HPP -#include "c1/c1_FpuStackSim.hpp" #include "c1/c1_FrameMap.hpp" #include "c1/c1_IR.hpp" #include "c1/c1_Instruction.hpp" @@ -35,7 +34,6 @@ #include "utilities/align.hpp" #include "utilities/macros.hpp" -class FpuStackAllocator; class IRScopeDebugInfo; class Interval; class IntervalWalker; @@ -107,7 +105,6 @@ class LinearScan : public CompilationResourceObj { friend class Interval; friend class IntervalWalker; friend class LinearScanWalker; - friend class FpuStackAllocator; friend class MoveResolver; friend class LinearScanStatistic; friend class LinearScanTimers; @@ -177,15 +174,6 @@ class LinearScan : public CompilationResourceObj { int num_loops() const { return ir()->num_loops(); } bool is_interval_in_loop(int interval, int loop) const { return _interval_in_loop.at(interval, loop); } - // handling of fpu stack allocation (platform dependent, needed for debug information generation) -#ifdef IA32 - FpuStackAllocator* _fpu_stack_allocator; - bool use_fpu_stack_allocation() const { return UseSSE < 2 && has_fpu_registers(); } -#else - bool use_fpu_stack_allocation() const { return false; } -#endif - - // access to interval list int interval_count() const { return _intervals.length(); } Interval* interval_at(int reg_num) const { return _intervals.at(reg_num); } @@ -358,11 +346,6 @@ class LinearScan : public CompilationResourceObj { void assign_reg_num(); - // Phase 8: fpu stack allocation - // (Used only on x86 when fpu operands are present) - void allocate_fpu_stack(); - - // helper functions for printing state #ifndef PRODUCT static void print_bitmap(BitMap& bitmap); @@ -900,7 +883,6 @@ class LinearScanStatistic : public StackObj { counter_throw, counter_unwind, counter_typecheck, - counter_fpu_stack, counter_misc_inst, counter_other_inst, blank_line_2, @@ -953,7 +935,6 @@ class LinearScanTimers : public StackObj { timer_sort_intervals_after, timer_eliminate_spill_moves, timer_assign_reg_num, - timer_allocate_fpu_stack, timer_optimize_lir, number_of_timers diff --git a/src/hotspot/share/c1/c1_Optimizer.cpp b/src/hotspot/share/c1/c1_Optimizer.cpp index d33e4d28bd0dd..3b188f537c661 100644 --- a/src/hotspot/share/c1/c1_Optimizer.cpp +++ b/src/hotspot/share/c1/c1_Optimizer.cpp @@ -578,7 +578,6 @@ class NullCheckVisitor: public InstructionVisitor { void do_Base (Base* x); void do_OsrEntry (OsrEntry* x); void do_ExceptionObject(ExceptionObject* x); - void do_RoundFP (RoundFP* x); void do_UnsafeGet (UnsafeGet* x); void do_UnsafePut (UnsafePut* x); void do_UnsafeGetAndSet(UnsafeGetAndSet* x); @@ -763,7 +762,6 @@ void NullCheckVisitor::do_Throw (Throw* x) { nce()->clear_las void NullCheckVisitor::do_Base (Base* x) {} void NullCheckVisitor::do_OsrEntry (OsrEntry* x) {} void NullCheckVisitor::do_ExceptionObject(ExceptionObject* x) { nce()->handle_ExceptionObject(x); } -void NullCheckVisitor::do_RoundFP (RoundFP* x) {} void NullCheckVisitor::do_UnsafeGet (UnsafeGet* x) {} void NullCheckVisitor::do_UnsafePut (UnsafePut* x) {} void NullCheckVisitor::do_UnsafeGetAndSet(UnsafeGetAndSet* x) {} diff --git a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp index 371dc59714bc2..833f5dd1e99cb 100644 --- a/src/hotspot/share/c1/c1_RangeCheckElimination.hpp +++ b/src/hotspot/share/c1/c1_RangeCheckElimination.hpp @@ -154,7 +154,6 @@ class RangeCheckEliminator { void do_Base (Base* x) { /* nothing to do */ }; void do_OsrEntry (OsrEntry* x) { /* nothing to do */ }; void do_ExceptionObject(ExceptionObject* x) { /* nothing to do */ }; - void do_RoundFP (RoundFP* x) { /* nothing to do */ }; void do_UnsafePut (UnsafePut* x) { /* nothing to do */ }; void do_UnsafeGet (UnsafeGet* x) { /* nothing to do */ }; void do_UnsafeGetAndSet(UnsafeGetAndSet* x) { /* nothing to do */ }; diff --git a/src/hotspot/share/c1/c1_ValueMap.hpp b/src/hotspot/share/c1/c1_ValueMap.hpp index c36bb5559ba66..12c372f27c84e 100644 --- a/src/hotspot/share/c1/c1_ValueMap.hpp +++ b/src/hotspot/share/c1/c1_ValueMap.hpp @@ -203,7 +203,6 @@ class ValueNumberingVisitor: public InstructionVisitor { void do_Base (Base* x) { /* nothing to do */ } void do_OsrEntry (OsrEntry* x) { /* nothing to do */ } void do_ExceptionObject(ExceptionObject* x) { /* nothing to do */ } - void do_RoundFP (RoundFP* x) { /* nothing to do */ } void do_ProfileCall (ProfileCall* x) { /* nothing to do */ } void do_ProfileReturnType (ProfileReturnType* x) { /* nothing to do */ } void do_ProfileInvoke (ProfileInvoke* x) { /* nothing to do */ }; diff --git a/src/hotspot/share/c1/c1_globals.hpp b/src/hotspot/share/c1/c1_globals.hpp index 723065853559c..572afc60a4546 100644 --- a/src/hotspot/share/c1/c1_globals.hpp +++ b/src/hotspot/share/c1/c1_globals.hpp @@ -280,19 +280,10 @@ "Maximum size of a compiled method.") \ range(0, 1*M) \ \ - develop(bool, TraceFPUStack, false, \ - "Trace emulation of the FPU stack (intel only)") \ - \ - develop(bool, TraceFPURegisterUsage, false, \ - "Trace usage of FPU registers at start of blocks (intel only)") \ - \ develop(intx, InstructionCountCutoff, 37000, \ "If GraphBuilder adds this many instructions, bails out") \ range(0, max_jint) \ \ - develop(bool, ComputeExactFPURegisterUsage, true, \ - "Compute additional live set for fpu registers to simplify fpu stack merge (Intel only)") \ - \ product(bool, C1ProfileCalls, true, \ "Profile calls when generating code for updating MDOs") \ \ diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp index 30956b6c793bd..8e0a4f19d21c6 100644 --- a/src/hotspot/share/compiler/compileBroker.cpp +++ b/src/hotspot/share/compiler/compileBroker.cpp @@ -1448,30 +1448,6 @@ nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci, // do the compilation if (method->is_native()) { if (!PreferInterpreterNativeStubs || method->is_method_handle_intrinsic()) { -#if defined(IA32) && !defined(ZERO) - // The following native methods: - // - // java.lang.Float.intBitsToFloat - // java.lang.Float.floatToRawIntBits - // java.lang.Double.longBitsToDouble - // java.lang.Double.doubleToRawLongBits - // - // are called through the interpreter even if interpreter native stubs - // are not preferred (i.e., calling through adapter handlers is preferred). - // The reason is that on x86_32 signaling NaNs (sNaNs) are not preserved - // if the version of the methods from the native libraries is called. - // As the interpreter and the C2-intrinsified version of the methods preserves - // sNaNs, that would result in an inconsistent way of handling of sNaNs. - if ((UseSSE >= 1 && - (method->intrinsic_id() == vmIntrinsics::_intBitsToFloat || - method->intrinsic_id() == vmIntrinsics::_floatToRawIntBits)) || - (UseSSE >= 2 && - (method->intrinsic_id() == vmIntrinsics::_longBitsToDouble || - method->intrinsic_id() == vmIntrinsics::_doubleToRawLongBits))) { - return nullptr; - } -#endif // IA32 && !ZERO - // To properly handle the appendix argument for out-of-line calls we are using a small trampoline that // pops off the appendix argument and jumps to the target (see gen_special_dispatch in SharedRuntime). // diff --git a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp index 080af1c9693ed..06175467f6247 100644 --- a/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp +++ b/src/hotspot/share/gc/shared/c2/barrierSetC2.cpp @@ -150,11 +150,6 @@ Node* BarrierSetC2::store_at_resolved(C2Access& access, C2AccessValue& val) cons C2ParseAccess& parse_access = static_cast(access); GraphKit* kit = parse_access.kit(); - if (bt == T_DOUBLE) { - Node* new_val = kit->dprecision_rounding(val.node()); - val.set_node(new_val); - } - store = kit->store_to_memory(kit->control(), access.addr().node(), val.node(), bt, mo, requires_atomic_access, unaligned, mismatched, unsafe, access.barrier_data()); diff --git a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp index fa3f9019af425..8ee82f79f262e 100644 --- a/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp +++ b/src/hotspot/share/gc/shenandoah/shenandoahArguments.cpp @@ -38,7 +38,7 @@ #include "utilities/defaultStream.hpp" void ShenandoahArguments::initialize() { -#if !(defined AARCH64 || defined AMD64 || defined IA32 || defined PPC64 || defined RISCV64) +#if !(defined AARCH64 || defined AMD64 || defined PPC64 || defined RISCV64) vm_exit_during_initialization("Shenandoah GC is not supported on this platform."); #endif diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp index 616ba29c62b33..59aeebecca118 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.cpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp @@ -131,10 +131,6 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan // Use optimized stub code for CRC32C methods. case vmIntrinsics::_updateBytesCRC32C: return java_util_zip_CRC32C_updateBytes; case vmIntrinsics::_updateDirectByteBufferCRC32C: return java_util_zip_CRC32C_updateDirectByteBuffer; - case vmIntrinsics::_intBitsToFloat: return java_lang_Float_intBitsToFloat; - case vmIntrinsics::_floatToRawIntBits: return java_lang_Float_floatToRawIntBits; - case vmIntrinsics::_longBitsToDouble: return java_lang_Double_longBitsToDouble; - case vmIntrinsics::_doubleToRawLongBits: return java_lang_Double_doubleToRawLongBits; case vmIntrinsics::_float16ToFloat: return java_lang_Float_float16ToFloat; case vmIntrinsics::_floatToFloat16: return java_lang_Float_floatToFloat16; case vmIntrinsics::_currentThread: return java_lang_Thread_currentThread; @@ -225,14 +221,6 @@ vmIntrinsics::ID AbstractInterpreter::method_intrinsic(MethodKind kind) { : return vmIntrinsics::_updateDirectByteBufferCRC32C; case java_lang_Thread_currentThread : return vmIntrinsics::_currentThread; - case java_lang_Float_intBitsToFloat - : return vmIntrinsics::_intBitsToFloat; - case java_lang_Float_floatToRawIntBits - : return vmIntrinsics::_floatToRawIntBits; - case java_lang_Double_longBitsToDouble - : return vmIntrinsics::_longBitsToDouble; - case java_lang_Double_doubleToRawLongBits - : return vmIntrinsics::_doubleToRawLongBits; case java_lang_Float_float16ToFloat : return vmIntrinsics::_float16ToFloat; case java_lang_Float_floatToFloat16 @@ -332,10 +320,6 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) { case java_util_zip_CRC32C_updateDirectByteBuffer: tty->print("java_util_zip_CRC32C_updateDirectByteByffer"); break; case java_lang_ref_reference_get : tty->print("java_lang_ref_reference_get"); break; case java_lang_Thread_currentThread : tty->print("java_lang_Thread_currentThread"); break; - case java_lang_Float_intBitsToFloat : tty->print("java_lang_Float_intBitsToFloat"); break; - case java_lang_Float_floatToRawIntBits : tty->print("java_lang_Float_floatToRawIntBits"); break; - case java_lang_Double_longBitsToDouble : tty->print("java_lang_Double_longBitsToDouble"); break; - case java_lang_Double_doubleToRawLongBits : tty->print("java_lang_Double_doubleToRawLongBits"); break; case java_lang_Float_float16ToFloat : tty->print("java_lang_Float_float16ToFloat"); break; case java_lang_Float_floatToFloat16 : tty->print("java_lang_Float_floatToFloat16"); break; default: diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp index 55fb58021a0d4..cc1a66f442d4d 100644 --- a/src/hotspot/share/interpreter/abstractInterpreter.hpp +++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp @@ -88,12 +88,8 @@ class AbstractInterpreter: AllStatic { java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer() java_util_zip_CRC32C_updateBytes, // implementation of java.util.zip.CRC32C.updateBytes(crc, b[], off, end) java_util_zip_CRC32C_updateDirectByteBuffer, // implementation of java.util.zip.CRC32C.updateDirectByteBuffer(crc, address, off, end) - java_lang_Float_intBitsToFloat, // implementation of java.lang.Float.intBitsToFloat() - java_lang_Float_floatToRawIntBits, // implementation of java.lang.Float.floatToRawIntBits() java_lang_Float_float16ToFloat, // implementation of java.lang.Float.float16ToFloat() java_lang_Float_floatToFloat16, // implementation of java.lang.Float.floatToFloat16() - java_lang_Double_longBitsToDouble, // implementation of java.lang.Double.longBitsToDouble() - java_lang_Double_doubleToRawLongBits, // implementation of java.lang.Double.doubleToRawLongBits() java_lang_Thread_currentThread, // implementation of java.lang.Thread.currentThread() number_of_method_entries, invalid = -1 diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp index 9d953d9db54fd..2b631df6f5840 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.cpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp @@ -1485,7 +1485,7 @@ JRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* current, Met // preparing the same method will be sure to see non-null entry & mirror. JRT_END -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(X86) || defined(ARM) JRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address)) assert(current == JavaThread::current(), "pre-condition"); if (src_address == dest_address) { diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp index 3635433f43432..6d383fd734d6a 100644 --- a/src/hotspot/share/interpreter/interpreterRuntime.hpp +++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp @@ -146,7 +146,7 @@ class InterpreterRuntime: AllStatic { Method* method, intptr_t* from, intptr_t* to); -#if defined(IA32) || defined(AMD64) || defined(ARM) +#if defined(X86) || defined(ARM) // Popframe support (only needed on x86, AMD64 and ARM) static void popframe_move_outgoing_args(JavaThread* current, void* src_address, void* dest_address); #endif diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp index 91c302888431e..110bb164cb68f 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp @@ -233,11 +233,6 @@ void TemplateInterpreterGenerator::generate_all() { native_method_entry(java_util_zip_CRC32_updateBytes) native_method_entry(java_util_zip_CRC32_updateByteBuffer) - native_method_entry(java_lang_Float_intBitsToFloat) - native_method_entry(java_lang_Float_floatToRawIntBits) - native_method_entry(java_lang_Double_longBitsToDouble) - native_method_entry(java_lang_Double_doubleToRawLongBits) - #undef native_method_entry // Bytecodes @@ -378,7 +373,6 @@ void TemplateInterpreterGenerator::generate_and_dispatch(Template* t, TosState t if (PrintBytecodePairHistogram) histogram_bytecode_pair(t); if (TraceBytecodes) trace_bytecode(t); if (StopInterpreterAt > 0) stop_interpreter_at(); - __ verify_FPU(1, t->tos_in()); #endif // !PRODUCT int step = 0; if (!t->does_dispatch()) { @@ -489,17 +483,6 @@ address TemplateInterpreterGenerator::generate_intrinsic_entry(AbstractInterpret : entry_point = generate_Float_float16ToFloat_entry(); break; case Interpreter::java_lang_Float_floatToFloat16 : entry_point = generate_Float_floatToFloat16_entry(); break; - - // On x86_32 platforms, a special entry is generated for the following four methods. - // On other platforms the native entry is used to enter these methods. - case Interpreter::java_lang_Float_intBitsToFloat - : entry_point = generate_Float_intBitsToFloat_entry(); break; - case Interpreter::java_lang_Float_floatToRawIntBits - : entry_point = generate_Float_floatToRawIntBits_entry(); break; - case Interpreter::java_lang_Double_longBitsToDouble - : entry_point = generate_Double_longBitsToDouble_entry(); break; - case Interpreter::java_lang_Double_doubleToRawLongBits - : entry_point = generate_Double_doubleToRawLongBits_entry(); break; default: fatal("unexpected intrinsic method kind: %d", kind); break; diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp index d3ddf2a830ff0..07a7ca6169dd2 100644 --- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp +++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp @@ -99,10 +99,6 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator { address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind); address generate_currentThread(); - address generate_Float_intBitsToFloat_entry(); - address generate_Float_floatToRawIntBits_entry(); - address generate_Double_longBitsToDouble_entry(); - address generate_Double_doubleToRawLongBits_entry(); address generate_Float_float16ToFloat_entry(); address generate_Float_floatToFloat16_entry(); diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp index 7661b35863c1b..22a9775f8989c 100644 --- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp +++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp @@ -101,7 +101,7 @@ inline R JfrBigEndian::read_unaligned(const address location) { } inline bool JfrBigEndian::platform_supports_unaligned_reads(void) { -#if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390) +#if defined(X86) || defined(PPC) || defined(S390) return true; #elif defined(ARM) || defined(AARCH64) || defined(RISCV) return false; diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp index 4a200241bb7f2..b096abd322962 100644 --- a/src/hotspot/share/opto/chaitin.cpp +++ b/src/hotspot/share/opto/chaitin.cpp @@ -955,7 +955,6 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // ------------------- reg_pressure -------------------- // Each entry is reg_pressure_per_value,number_of_regs // RegL RegI RegFlags RegF RegD INTPRESSURE FLOATPRESSURE - // IA32 2 1 1 1 1 6 6 // SPARC 2 2 2 2 2 48 (24) 52 (26) // SPARCV9 2 2 2 2 2 48 (24) 52 (26) // AMD64 1 1 1 1 1 14 15 @@ -971,12 +970,6 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // Define platform specific register pressure #if defined(ARM32) lrg.set_reg_pressure(2); -#elif defined(IA32) - if( ireg == Op_RegL ) { - lrg.set_reg_pressure(2); - } else { - lrg.set_reg_pressure(1); - } #else lrg.set_reg_pressure(1); // normally one value per register #endif diff --git a/src/hotspot/share/opto/classes.hpp b/src/hotspot/share/opto/classes.hpp index 60ee3e01137b0..9b8d772ae905b 100644 --- a/src/hotspot/share/opto/classes.hpp +++ b/src/hotspot/share/opto/classes.hpp @@ -304,10 +304,8 @@ macro(ReverseI) macro(ReverseL) macro(ReverseV) macro(Root) -macro(RoundDouble) macro(RoundDoubleMode) macro(RoundDoubleModeV) -macro(RoundFloat) macro(RotateLeft) macro(RotateLeftV) macro(RotateRight) diff --git a/src/hotspot/share/opto/compile.cpp b/src/hotspot/share/opto/compile.cpp index 2da8e27d95cd4..112397fac2c7a 100644 --- a/src/hotspot/share/opto/compile.cpp +++ b/src/hotspot/share/opto/compile.cpp @@ -1008,8 +1008,6 @@ void Compile::Init(bool aliasing) { _matcher = nullptr; // filled in later _cfg = nullptr; // filled in later - IA32_ONLY( set_24_bit_selection_and_mode(true, false); ) - _node_note_array = nullptr; _default_node_notes = nullptr; DEBUG_ONLY( _modified_nodes = nullptr; ) // Used in Optimize() @@ -4031,17 +4029,6 @@ bool Compile::final_graph_reshaping() { } } -#ifdef IA32 - // If original bytecodes contained a mixture of floats and doubles - // check if the optimizer has made it homogeneous, item (3). - if (UseSSE == 0 && - frc.get_float_count() > 32 && - frc.get_double_count() == 0 && - (10 * frc.get_call_count() < frc.get_float_count()) ) { - set_24_bit_selection_and_mode(false, true); - } -#endif // IA32 - set_java_calls(frc.get_java_call_count()); set_inner_loops(frc.get_inner_loop_count()); diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp index 223e703376103..40aec9f393dcd 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp @@ -1318,21 +1318,6 @@ class Compile : public Phase { bool needs_clinit_barrier(ciMethod* ik, ciMethod* accessing_method); bool needs_clinit_barrier(ciInstanceKlass* ik, ciMethod* accessing_method); -#ifdef IA32 - private: - bool _select_24_bit_instr; // We selected an instruction with a 24-bit result - bool _in_24_bit_fp_mode; // We are emitting instructions with 24-bit results - - // Remember if this compilation changes hardware mode to 24-bit precision. - void set_24_bit_selection_and_mode(bool selection, bool mode) { - _select_24_bit_instr = selection; - _in_24_bit_fp_mode = mode; - } - - public: - bool select_24_bit_instr() const { return _select_24_bit_instr; } - bool in_24_bit_fp_mode() const { return _in_24_bit_fp_mode; } -#endif // IA32 #ifdef ASSERT VerifyMeetResult* _type_verify; void set_exception_backedge() { _exception_backedge = true; } diff --git a/src/hotspot/share/opto/convertnode.cpp b/src/hotspot/share/opto/convertnode.cpp index 0a2131782a237..cf61c664a0d67 100644 --- a/src/hotspot/share/opto/convertnode.cpp +++ b/src/hotspot/share/opto/convertnode.cpp @@ -180,16 +180,6 @@ const Type* ConvD2INode::Value(PhaseGVN* phase) const { return TypeInt::make( SharedRuntime::d2i( td->getd() ) ); } -//------------------------------Ideal------------------------------------------ -// If converting to an int type, skip any rounding nodes -Node *ConvD2INode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(1)->Opcode() == Op_RoundDouble) { - set_req(1, in(1)->in(1)); - return this; - } - return nullptr; -} - //------------------------------Identity--------------------------------------- // Int's can be converted to doubles with no loss of bits. Hence // converting an integer to a double and back to an integer is a NOP. @@ -216,16 +206,6 @@ Node* ConvD2LNode::Identity(PhaseGVN* phase) { return this; } -//------------------------------Ideal------------------------------------------ -// If converting to an int type, skip any rounding nodes -Node *ConvD2LNode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(1)->Opcode() == Op_RoundDouble) { - set_req(1, in(1)->in(1)); - return this; - } - return nullptr; -} - //============================================================================= //------------------------------Value------------------------------------------ const Type* ConvF2DNode::Value(PhaseGVN* phase) const { @@ -269,12 +249,7 @@ Node* ConvF2INode::Identity(PhaseGVN* phase) { } //------------------------------Ideal------------------------------------------ -// If converting to an int type, skip any rounding nodes Node *ConvF2INode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(1)->Opcode() == Op_RoundFloat) { - set_req(1, in(1)->in(1)); - return this; - } return nullptr; } @@ -298,12 +273,7 @@ Node* ConvF2LNode::Identity(PhaseGVN* phase) { } //------------------------------Ideal------------------------------------------ -// If converting to an int type, skip any rounding nodes Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) { - if (in(1)->Opcode() == Op_RoundFloat) { - set_req(1, in(1)->in(1)); - return this; - } return nullptr; } @@ -833,52 +803,6 @@ Node *ConvL2INode::Ideal(PhaseGVN *phase, bool can_reshape) { return nullptr; } - - -//============================================================================= -//------------------------------Identity--------------------------------------- -// Remove redundant roundings -Node* RoundFloatNode::Identity(PhaseGVN* phase) { - assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel"); - // Do not round constants - if (phase->type(in(1))->base() == Type::FloatCon) return in(1); - int op = in(1)->Opcode(); - // Redundant rounding - if( op == Op_RoundFloat ) return in(1); - // Already rounded - if( op == Op_Parm ) return in(1); - if( op == Op_LoadF ) return in(1); - return this; -} - -//------------------------------Value------------------------------------------ -const Type* RoundFloatNode::Value(PhaseGVN* phase) const { - return phase->type( in(1) ); -} - -//============================================================================= -//------------------------------Identity--------------------------------------- -// Remove redundant roundings. Incoming arguments are already rounded. -Node* RoundDoubleNode::Identity(PhaseGVN* phase) { - assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel"); - // Do not round constants - if (phase->type(in(1))->base() == Type::DoubleCon) return in(1); - int op = in(1)->Opcode(); - // Redundant rounding - if( op == Op_RoundDouble ) return in(1); - // Already rounded - if( op == Op_Parm ) return in(1); - if( op == Op_LoadD ) return in(1); - if( op == Op_ConvF2D ) return in(1); - if( op == Op_ConvI2D ) return in(1); - return this; -} - -//------------------------------Value------------------------------------------ -const Type* RoundDoubleNode::Value(PhaseGVN* phase) const { - return phase->type( in(1) ); -} - //============================================================================= RoundDoubleModeNode* RoundDoubleModeNode::make(PhaseGVN& gvn, Node* arg, RoundDoubleModeNode::RoundingMode rmode) { ConINode* rm = gvn.intcon(rmode); diff --git a/src/hotspot/share/opto/convertnode.hpp b/src/hotspot/share/opto/convertnode.hpp index 9438176a9f996..10782fee2f6fa 100644 --- a/src/hotspot/share/opto/convertnode.hpp +++ b/src/hotspot/share/opto/convertnode.hpp @@ -79,7 +79,6 @@ class ConvD2INode : public ConvertNode { virtual const Type* in_type() const { return Type::DOUBLE; } virtual const Type* Value(PhaseGVN* phase) const; virtual Node* Identity(PhaseGVN* phase); - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); }; //------------------------------ConvD2LNode------------------------------------ @@ -91,7 +90,6 @@ class ConvD2LNode : public ConvertNode { virtual const Type* in_type() const { return Type::DOUBLE; } virtual const Type* Value(PhaseGVN* phase) const; virtual Node* Identity(PhaseGVN* phase); - virtual Node* Ideal(PhaseGVN* phase, bool can_reshape); }; //------------------------------ConvF2DNode------------------------------------ @@ -229,28 +227,6 @@ class RoundFNode : public Node { virtual uint ideal_reg() const { return Op_RegI; } }; -//-----------------------------RoundFloatNode---------------------------------- -class RoundFloatNode: public Node { - public: - RoundFloatNode(Node* c, Node *in1): Node(c, in1) {} - virtual int Opcode() const; - virtual const Type *bottom_type() const { return Type::FLOAT; } - virtual uint ideal_reg() const { return Op_RegF; } - virtual Node* Identity(PhaseGVN* phase); - virtual const Type* Value(PhaseGVN* phase) const; -}; - - -//-----------------------------RoundDoubleNode--------------------------------- -class RoundDoubleNode: public Node { - public: - RoundDoubleNode(Node* c, Node *in1): Node(c, in1) {} - virtual int Opcode() const; - virtual const Type *bottom_type() const { return Type::DOUBLE; } - virtual uint ideal_reg() const { return Op_RegD; } - virtual Node* Identity(PhaseGVN* phase); - virtual const Type* Value(PhaseGVN* phase) const; -}; //-----------------------------RoundDoubleModeNode----------------------------- class RoundDoubleModeNode: public Node { diff --git a/src/hotspot/share/opto/divnode.cpp b/src/hotspot/share/opto/divnode.cpp index e1b143f65f8cb..b81ac0eae48cb 100644 --- a/src/hotspot/share/opto/divnode.cpp +++ b/src/hotspot/share/opto/divnode.cpp @@ -817,15 +817,11 @@ const Type* DivDNode::Value(PhaseGVN* phase) const { if( t2 == TypeD::ONE ) return t1; - // IA32 would only execute this for non-strict FP, which is never the - // case now. -#if ! defined(IA32) // If divisor is a constant and not zero, divide them numbers if( t1->base() == Type::DoubleCon && t2->base() == Type::DoubleCon && t2->getd() != 0.0 ) // could be negative zero return TypeD::make( t1->getd()/t2->getd() ); -#endif // If the dividend is a constant zero // Note: if t1 and t2 are zero then result is NaN (JVMS page 213) diff --git a/src/hotspot/share/opto/doCall.cpp b/src/hotspot/share/opto/doCall.cpp index 0a5e27ed5b13f..9a55c8db75c96 100644 --- a/src/hotspot/share/opto/doCall.cpp +++ b/src/hotspot/share/opto/doCall.cpp @@ -644,9 +644,6 @@ void Parse::do_call() { orig_callee = callee = nullptr; // --------------------- - // Round double arguments before call - round_double_arguments(cg->method()); - // Feed profiling data for arguments to the type system so it can // propagate it as speculative types record_profiled_arguments_for_speculation(cg->method(), bc()); diff --git a/src/hotspot/share/opto/graphKit.cpp b/src/hotspot/share/opto/graphKit.cpp index 0cc68c426398e..2f7bf23c86075 100644 --- a/src/hotspot/share/opto/graphKit.cpp +++ b/src/hotspot/share/opto/graphKit.cpp @@ -2365,52 +2365,6 @@ void GraphKit::record_profiled_return_for_speculation() { } } -void GraphKit::round_double_arguments(ciMethod* dest_method) { - if (Matcher::strict_fp_requires_explicit_rounding) { - // (Note: TypeFunc::make has a cache that makes this fast.) - const TypeFunc* tf = TypeFunc::make(dest_method); - int nargs = tf->domain()->cnt() - TypeFunc::Parms; - for (int j = 0; j < nargs; j++) { - const Type *targ = tf->domain()->field_at(j + TypeFunc::Parms); - if (targ->basic_type() == T_DOUBLE) { - // If any parameters are doubles, they must be rounded before - // the call, dprecision_rounding does gvn.transform - Node *arg = argument(j); - arg = dprecision_rounding(arg); - set_argument(j, arg); - } - } - } -} - -// rounding for strict float precision conformance -Node* GraphKit::precision_rounding(Node* n) { - if (Matcher::strict_fp_requires_explicit_rounding) { -#ifdef IA32 - if (UseSSE == 0) { - return _gvn.transform(new RoundFloatNode(nullptr, n)); - } -#else - Unimplemented(); -#endif // IA32 - } - return n; -} - -// rounding for strict double precision conformance -Node* GraphKit::dprecision_rounding(Node *n) { - if (Matcher::strict_fp_requires_explicit_rounding) { -#ifdef IA32 - if (UseSSE < 2) { - return _gvn.transform(new RoundDoubleNode(nullptr, n)); - } -#else - Unimplemented(); -#endif // IA32 - } - return n; -} - //============================================================================= // Generate a fast path/slow path idiom. Graph looks like: // [foo] indicates that 'foo' is a parameter diff --git a/src/hotspot/share/opto/graphKit.hpp b/src/hotspot/share/opto/graphKit.hpp index b0150df04ed2b..fcf5e501bd038 100644 --- a/src/hotspot/share/opto/graphKit.hpp +++ b/src/hotspot/share/opto/graphKit.hpp @@ -752,14 +752,6 @@ class GraphKit : public Phase { void final_sync(IdealKit& ideal); public: - // Helper function to round double arguments before a call - void round_double_arguments(ciMethod* dest_method); - - // rounding for strict float precision conformance - Node* precision_rounding(Node* n); - - // rounding for strict double precision conformance - Node* dprecision_rounding(Node* n); // Helper functions for fast/slow path codes Node* opt_iff(Node* region, Node* iff); diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index 40b19eecd9f65..99ad8492e2e99 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -1723,20 +1723,6 @@ bool LibraryCallKit::inline_string_char_access(bool is_store) { return true; } -//--------------------------round_double_node-------------------------------- -// Round a double node if necessary. -Node* LibraryCallKit::round_double_node(Node* n) { - if (Matcher::strict_fp_requires_explicit_rounding) { -#ifdef IA32 - if (UseSSE < 2) { - n = _gvn.transform(new RoundDoubleNode(nullptr, n)); - } -#else - Unimplemented(); -#endif // IA32 - } - return n; -} //------------------------------inline_math----------------------------------- // public static double Math.abs(double) @@ -1745,7 +1731,7 @@ Node* LibraryCallKit::round_double_node(Node* n) { // public static double Math.log10(double) // public static double Math.round(double) bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) { - Node* arg = round_double_node(argument(0)); + Node* arg = argument(0); Node* n = nullptr; switch (id) { case vmIntrinsics::_dabs: n = new AbsDNode( arg); break; @@ -1756,7 +1742,7 @@ bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) { case vmIntrinsics::_floor: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_floor); break; case vmIntrinsics::_rint: n = RoundDoubleModeNode::make(_gvn, arg, RoundDoubleModeNode::rmode_rint); break; case vmIntrinsics::_roundD: n = new RoundDNode(arg); break; - case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, round_double_node(argument(2))); break; + case vmIntrinsics::_dcopySign: n = CopySignDNode::make(_gvn, arg, argument(2)); break; case vmIntrinsics::_dsignum: n = SignumDNode::make(_gvn, arg); break; default: fatal_unexpected_iid(id); break; } @@ -1790,8 +1776,8 @@ bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, c "must be (DD)D or (D)D type"); // Inputs - Node* a = round_double_node(argument(0)); - Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? round_double_node(argument(2)) : nullptr; + Node* a = argument(0); + Node* b = (call_type == OptoRuntime::Math_DD_D_Type()) ? argument(2) : nullptr; const TypePtr* no_memory_effects = nullptr; Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName, @@ -1809,17 +1795,17 @@ bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, c //------------------------------inline_math_pow----------------------------- bool LibraryCallKit::inline_math_pow() { - Node* exp = round_double_node(argument(2)); + Node* exp = argument(2); const TypeD* d = _gvn.type(exp)->isa_double_constant(); if (d != nullptr) { if (d->getd() == 2.0) { // Special case: pow(x, 2.0) => x * x - Node* base = round_double_node(argument(0)); + Node* base = argument(0); set_result(_gvn.transform(new MulDNode(base, base))); return true; } else if (d->getd() == 0.5 && Matcher::match_rule_supported(Op_SqrtD)) { // Special case: pow(x, 0.5) => sqrt(x) - Node* base = round_double_node(argument(0)); + Node* base = argument(0); Node* zero = _gvn.zerocon(T_DOUBLE); RegionNode* region = new RegionNode(3); @@ -8261,9 +8247,9 @@ bool LibraryCallKit::inline_fma(vmIntrinsics::ID id) { case vmIntrinsics::_fmaD: assert(callee()->signature()->size() == 6, "fma has 3 parameters of size 2 each."); // no receiver since it is static method - a = round_double_node(argument(0)); - b = round_double_node(argument(2)); - c = round_double_node(argument(4)); + a = argument(0); + b = argument(2); + c = argument(4); result = _gvn.transform(new FmaDNode(control(), a, b, c)); break; case vmIntrinsics::_fmaF: @@ -8359,8 +8345,8 @@ bool LibraryCallKit::inline_fp_min_max(vmIntrinsics::ID id) { case vmIntrinsics::_maxD_strict: case vmIntrinsics::_minD_strict: assert(callee()->signature()->size() == 4, "minD/maxD has 2 parameters of size 2 each."); - a = round_double_node(argument(0)); - b = round_double_node(argument(2)); + a = argument(0); + b = argument(2); break; default: fatal_unexpected_iid(id); diff --git a/src/hotspot/share/opto/library_call.hpp b/src/hotspot/share/opto/library_call.hpp index f629f757df2bf..28004b24ad33d 100644 --- a/src/hotspot/share/opto/library_call.hpp +++ b/src/hotspot/share/opto/library_call.hpp @@ -198,7 +198,6 @@ class LibraryCallKit : public GraphKit { bool inline_string_getCharsU(); bool inline_string_copy(bool compress); bool inline_string_char_access(bool is_store); - Node* round_double_node(Node* n); bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); bool inline_math(vmIntrinsics::ID id); diff --git a/src/hotspot/share/opto/machnode.hpp b/src/hotspot/share/opto/machnode.hpp index 4ac91175f78ed..ee65d57c0eb54 100644 --- a/src/hotspot/share/opto/machnode.hpp +++ b/src/hotspot/share/opto/machnode.hpp @@ -99,7 +99,7 @@ class MachOper : public ResourceObj { return ::as_FloatRegister(reg(ra_, node, idx)); } -#if defined(IA32) || defined(AMD64) +#if defined(AMD64) KRegister as_KRegister(PhaseRegAlloc *ra_, const Node *node) const { return ::as_KRegister(reg(ra_, node)); } diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp index ad98fda025f07..40c13014ba4e0 100644 --- a/src/hotspot/share/opto/mulnode.cpp +++ b/src/hotspot/share/opto/mulnode.cpp @@ -201,14 +201,6 @@ const Type* MulNode::Value(PhaseGVN* phase) const { if( t1 == Type::BOTTOM || t2 == Type::BOTTOM ) return bottom_type(); -#if defined(IA32) - // Can't trust native compilers to properly fold strict double - // multiplication with round-to-zero on this platform. - if (op == Op_MulD) { - return TypeD::DOUBLE; - } -#endif - return mul_ring(t1,t2); // Local flavor of type multiplication } diff --git a/src/hotspot/share/opto/parse2.cpp b/src/hotspot/share/opto/parse2.cpp index 73cf923480822..a5cf3333a7ee6 100644 --- a/src/hotspot/share/opto/parse2.cpp +++ b/src/hotspot/share/opto/parse2.cpp @@ -2080,19 +2080,19 @@ void Parse::do_one_bytecode() { // double stores case Bytecodes::_dstore_0: - set_pair_local( 0, dprecision_rounding(pop_pair()) ); + set_pair_local( 0, pop_pair() ); break; case Bytecodes::_dstore_1: - set_pair_local( 1, dprecision_rounding(pop_pair()) ); + set_pair_local( 1, pop_pair() ); break; case Bytecodes::_dstore_2: - set_pair_local( 2, dprecision_rounding(pop_pair()) ); + set_pair_local( 2, pop_pair() ); break; case Bytecodes::_dstore_3: - set_pair_local( 3, dprecision_rounding(pop_pair()) ); + set_pair_local( 3, pop_pair() ); break; case Bytecodes::_dstore: - set_pair_local( iter().get_index(), dprecision_rounding(pop_pair()) ); + set_pair_local( iter().get_index(), pop_pair() ); break; case Bytecodes::_pop: dec_sp(1); break; @@ -2274,32 +2274,28 @@ void Parse::do_one_bytecode() { b = pop(); a = pop(); c = _gvn.transform( new SubFNode(a,b) ); - d = precision_rounding(c); - push( d ); + push( c ); break; case Bytecodes::_fadd: b = pop(); a = pop(); c = _gvn.transform( new AddFNode(a,b) ); - d = precision_rounding(c); - push( d ); + push( c ); break; case Bytecodes::_fmul: b = pop(); a = pop(); c = _gvn.transform( new MulFNode(a,b) ); - d = precision_rounding(c); - push( d ); + push( c ); break; case Bytecodes::_fdiv: b = pop(); a = pop(); c = _gvn.transform( new DivFNode(nullptr,a,b) ); - d = precision_rounding(c); - push( d ); + push( c ); break; case Bytecodes::_frem: @@ -2308,8 +2304,7 @@ void Parse::do_one_bytecode() { b = pop(); a = pop(); c = _gvn.transform( new ModFNode(nullptr,a,b) ); - d = precision_rounding(c); - push( d ); + push( c ); } else { // Generate a call. @@ -2366,11 +2361,6 @@ void Parse::do_one_bytecode() { if (Matcher::convL2FSupported()) { a = pop_pair(); b = _gvn.transform( new ConvL2FNode(a)); - // For x86_32.ad, FILD doesn't restrict precision to 24 or 53 bits. - // Rather than storing the result into an FP register then pushing - // out to memory to round, the machine instruction that implements - // ConvL2D is responsible for rounding. - // c = precision_rounding(b); push(b); } else { l2f(); @@ -2380,8 +2370,6 @@ void Parse::do_one_bytecode() { case Bytecodes::_l2d: a = pop_pair(); b = _gvn.transform( new ConvL2DNode(a)); - // For x86_32.ad, rounding is always necessary (see _l2f above). - // c = dprecision_rounding(b); push_pair(b); break; @@ -2401,32 +2389,28 @@ void Parse::do_one_bytecode() { b = pop_pair(); a = pop_pair(); c = _gvn.transform( new SubDNode(a,b) ); - d = dprecision_rounding(c); - push_pair( d ); + push_pair( c ); break; case Bytecodes::_dadd: b = pop_pair(); a = pop_pair(); c = _gvn.transform( new AddDNode(a,b) ); - d = dprecision_rounding(c); - push_pair( d ); + push_pair( c ); break; case Bytecodes::_dmul: b = pop_pair(); a = pop_pair(); c = _gvn.transform( new MulDNode(a,b) ); - d = dprecision_rounding(c); - push_pair( d ); + push_pair( c ); break; case Bytecodes::_ddiv: b = pop_pair(); a = pop_pair(); c = _gvn.transform( new DivDNode(nullptr,a,b) ); - d = dprecision_rounding(c); - push_pair( d ); + push_pair( c ); break; case Bytecodes::_dneg: @@ -2443,8 +2427,7 @@ void Parse::do_one_bytecode() { // a % b c = _gvn.transform( new ModDNode(nullptr,a,b) ); - d = dprecision_rounding(c); - push_pair( d ); + push_pair( c ); } else { // Generate a call. @@ -2621,7 +2604,6 @@ void Parse::do_one_bytecode() { case Bytecodes::_i2f: a = pop(); b = _gvn.transform( new ConvI2FNode(a) ) ; - c = precision_rounding(b); push (b); break; diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp index ad2e755e69868..f38fe371f657b 100644 --- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp +++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp @@ -348,9 +348,7 @@ JVMFlag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) { } int minimum_alignment = 16; -#if defined(X86) && !defined(AMD64) - minimum_alignment = 4; -#elif defined(S390) +#if defined(S390) minimum_alignment = 2; #endif diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp index 11c13fe3c9e19..e4857a2ef0d74 100644 --- a/src/hotspot/share/runtime/globals.hpp +++ b/src/hotspot/share/runtime/globals.hpp @@ -1158,9 +1158,6 @@ const int ObjectAlignmentInBytes = 8; develop(bool, VerifyJNIFields, trueInDebug, \ "Verify jfieldIDs for instance fields") \ \ - develop(bool, VerifyFPU, false, \ - "Verify FPU state (check for NaN's, etc.)") \ - \ develop(bool, VerifyActivationFrameSize, false, \ "Verify that activation frame didn't become smaller than its " \ "minimal size") \ diff --git a/test/hotspot/gtest/x86/test_assemblerx86.cpp b/test/hotspot/gtest/x86/test_assemblerx86.cpp index 85cb0d13f63e3..f43cc52221c7a 100644 --- a/test/hotspot/gtest/x86/test_assemblerx86.cpp +++ b/test/hotspot/gtest/x86/test_assemblerx86.cpp @@ -31,7 +31,9 @@ #include "asm/assembler.hpp" #include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" #include "asm/macroAssembler.hpp" +#include "code/codeCache.hpp" #include "memory/resourceArea.hpp" #include "unittest.hpp"