diff --git a/.github/workflows/submit.yml b/.github/workflows/submit.yml
index 382c4e20872..ebe72381738 100644
--- a/.github/workflows/submit.yml
+++ b/.github/workflows/submit.yml
@@ -9,7 +9,7 @@ on:
platforms:
description: "Platform(s) to execute on"
required: true
- default: "Linux x64, Windows x64, macOS x64"
+ default: "Linux x64, Linux x86, Windows x64, macOS x64"
jobs:
prerequisites:
@@ -18,6 +18,7 @@ jobs:
outputs:
should_run: ${{ steps.check_submit.outputs.should_run }}
bundle_id: ${{ steps.check_bundle_id.outputs.bundle_id }}
+ platform_linux_x86: ${{ steps.check_platforms.outputs.platform_linux_x86 }}
platform_linux_x64: ${{ steps.check_platforms.outputs.platform_linux_x64 }}
platform_windows_x64: ${{ steps.check_platforms.outputs.platform_windows_x64 }}
platform_macos_x64: ${{ steps.check_platforms.outputs.platform_macos_x64 }}
@@ -32,11 +33,13 @@ jobs:
id: check_platforms
run: |
echo "::set-output name=platform_linux_x64::${{ contains(github.event.inputs.platforms, 'linux x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x64'))) }}"
+ echo "::set-output name=platform_linux_x86::${{ contains(github.event.inputs.platforms, 'linux x86') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x86'))) }}"
echo "::set-output name=platform_windows_x64::${{ contains(github.event.inputs.platforms, 'windows x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'windows x64'))) }}"
echo "::set-output name=platform_macos_x64::${{ contains(github.event.inputs.platforms, 'macos x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'macos x64'))) }}"
if: steps.check_submit.outputs.should_run != 'false'
- name: Determine unique bundle identifier
+ id: check_bundle_id
run: echo "::set-output name=bundle_id::${GITHUB_ACTOR}_${GITHUB_SHA:0:8}"
if: steps.check_submit.outputs.should_run != 'false'
@@ -113,7 +116,7 @@ jobs:
flags: --enable-debug
artifact: -debug
- flavor: build hotspot no-pch
- flags: --disable-precompiled-headers
+ flags: --enable-debug --disable-precompiled-headers
build-target: hotspot
- flavor: build hotspot zero
flags: --enable-debug --disable-precompiled-headers --with-jvm-variants=zero
@@ -348,12 +351,326 @@ jobs:
if: always()
run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ linux_x86_build:
+ name: Linux x86
+ runs-on: "ubuntu-latest"
+ needs: prerequisites
+ if: needs.prerequisites.outputs.should_run != 'false' && needs.prerequisites.outputs.platform_linux_x86 != 'false'
+
+ strategy:
+ fail-fast: false
+ matrix:
+ flavor:
+ - build release
+ - build debug
+ include:
+ - flavor: build debug
+ flags: --enable-debug
+ artifact: -debug
+
+ # Reduced 32-bit build uses the same boot JDK as 64-bit build
+ env:
+ JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}"
+ BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}"
+ BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}"
+ BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}"
+ BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}"
+
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ path: jdk
+
+ - name: Restore boot JDK from cache
+ id: bootjdk
+ uses: actions/cache@v2
+ with:
+ path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }}
+ key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1
+
+ - name: Download boot JDK
+ run: |
+ mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}"
+ echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null -
+ tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"
+ if: steps.bootjdk.outputs.cache-hit != 'true'
+
+ - name: Restore jtreg artifact
+ id: jtreg_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ continue-on-error: true
+
+ - name: Restore jtreg artifact (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ if: steps.jtreg_restore.outcome == 'failure'
+
+ - name: Checkout gtest sources
+ uses: actions/checkout@v2
+ with:
+ repository: "google/googletest"
+ ref: "release-${{ fromJson(needs.prerequisites.outputs.dependencies).GTEST_VERSION }}"
+ path: gtest
+
+ # Roll in the multilib environment and its dependencies.
+ # Some multilib libraries do not have proper inter-dependencies, so we have to
+ # install their dependencies manually.
+ - name: Install dependencies
+ run: |
+ sudo dpkg --add-architecture i386
+ sudo apt-get update
+ sudo apt-get install gcc-multilib g++-multilib libfreetype6-dev:i386 libxrandr-dev:i386 libxtst-dev:i386 libtiff-dev:i386 libcupsimage2-dev:i386 libcups2-dev:i386 libasound2-dev:i386
+
+ - name: Configure
+ run: >
+ bash configure
+ --with-conf-name=linux-x86
+ --with-target-bits=32
+ ${{ matrix.flags }}
+ --with-version-opt=${GITHUB_ACTOR}-${GITHUB_SHA}
+ --with-version-build=0
+ --with-boot-jdk=${HOME}/bootjdk/${BOOT_JDK_VERSION}
+ --with-jtreg=${HOME}/jtreg
+ --with-gtest=${GITHUB_WORKSPACE}/gtest
+ --with-default-make-target="product-bundles test-bundles"
+ --with-zlib=system
+ --enable-jtreg-failure-handler
+ working-directory: jdk
+
+ - name: Build
+ run: make CONF_NAME=linux-x86 ${{ matrix.build-target }}
+ working-directory: jdk
+
+ - name: Persist test bundles
+ uses: actions/upload-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: |
+ jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz
+ jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz
+ if: matrix.build-target == false
+
+ linux_x86_test:
+ name: Linux x86
+ runs-on: "ubuntu-latest"
+ needs:
+ - prerequisites
+ - linux_x86_build
+
+ strategy:
+ fail-fast: false
+ matrix:
+ test:
+ - jdk/tier1 part 1
+ - jdk/tier1 part 2
+ - jdk/tier1 part 3
+ - langtools/tier1
+ - hs/tier1 common
+ - hs/tier1 compiler
+ - hs/tier1 gc
+ - hs/tier1 runtime
+ - hs/tier1 serviceability
+ include:
+ - test: jdk/tier1 part 1
+ suites: test/jdk/:tier1_part1
+ - test: jdk/tier1 part 2
+ suites: test/jdk/:tier1_part2
+ - test: jdk/tier1 part 3
+ suites: test/jdk/:tier1_part3
+ - test: langtools/tier1
+ suites: test/langtools/:tier1
+ - test: hs/tier1 common
+ suites: test/hotspot/jtreg/:tier1_common
+ artifact: -debug
+ - test: hs/tier1 compiler
+ suites: test/hotspot/jtreg/:tier1_compiler
+ artifact: -debug
+ - test: hs/tier1 gc
+ suites: test/hotspot/jtreg/:tier1_gc
+ artifact: -debug
+ - test: hs/tier1 runtime
+ suites: test/hotspot/jtreg/:tier1_runtime
+ artifact: -debug
+ - test: hs/tier1 serviceability
+ suites: test/hotspot/jtreg/:tier1_serviceability
+ artifact: -debug
+
+ # Reduced 32-bit build uses the same boot JDK as 64-bit build
+ env:
+ JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}"
+ BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}"
+ BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}"
+ BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}"
+ BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}"
+
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+
+ - name: Restore boot JDK from cache
+ id: bootjdk
+ uses: actions/cache@v2
+ with:
+ path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }}
+ key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1
+
+ - name: Download boot JDK
+ run: |
+ mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}"
+ echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null -
+ tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"
+ if: steps.bootjdk.outputs.cache-hit != 'true'
+
+ - name: Restore jtreg artifact
+ id: jtreg_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ continue-on-error: true
+
+ - name: Restore jtreg artifact (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ if: steps.jtreg_restore.outcome == 'failure'
+
+ - name: Restore build artifacts
+ id: build_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jdk-linux-x86${{ matrix.artifact }}
+ continue-on-error: true
+
+ - name: Restore build artifacts (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jdk-linux-x86${{ matrix.artifact }}
+ if: steps.build_restore.outcome == 'failure'
+
+ - name: Unpack jdk
+ run: |
+ mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}"
+ tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}"
+
+ - name: Unpack tests
+ run: |
+ mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}"
+ tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}"
+
+ - name: Find root of jdk image dir
+ run: |
+ imageroot=`find ${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }} -name release -type f`
+ echo "imageroot=`dirname ${imageroot}`" >> $GITHUB_ENV
+
+ - name: Run tests
+ run: >
+ JDK_IMAGE_DIR=${{ env.imageroot }}
+ TEST_IMAGE_DIR=${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}
+ BOOT_JDK=${HOME}/bootjdk/${BOOT_JDK_VERSION}
+ JT_HOME=${HOME}/jtreg
+ make test-prebuilt
+ CONF_NAME=run-test-prebuilt
+ LOG_CMDLINES=true
+ JTREG_VERBOSE=fail,error,time
+ TEST="${{ matrix.suites }}"
+ TEST_OPTS_JAVA_OPTIONS=
+ JTREG_KEYWORDS="!headful"
+ JTREG="JAVA_OPTIONS=-XX:-CreateCoredumpOnCrash"
+
+ - name: Check that all tests executed successfully
+ if: always()
+ run: >
+ if ! grep --include=test-summary.txt -lqr build/*/test-results -e "TEST SUCCESS" ; then
+ cat build/*/test-results/*/text/newfailures.txt ;
+ exit 1 ;
+ fi
+
+ - name: Create suitable test log artifact name
+ if: always()
+ run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
+
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
if: always()
uses: actions/upload-artifact@v2
with:
- name: linux-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
windows_x64_build:
@@ -635,12 +952,41 @@ jobs:
if: always()
run: echo ("logsuffix=" + ("${{ matrix.test }}" -replace "/", "_" -replace " ", "_")) | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ;
+ zip -r9
+ "$HOME/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ;
+ zip -r9
+ "$HOME/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
if: always()
uses: actions/upload-artifact@v2
with:
- name: windows-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
macos_x64_build:
@@ -890,12 +1236,39 @@ jobs:
if: always()
run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
if: always()
uses: actions/upload-artifact@v2
with:
- name: macos-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
artifacts:
@@ -904,7 +1277,9 @@ jobs:
if: always()
continue-on-error: true
needs:
+ - prerequisites
- linux_x64_test
+ - linux_x86_test
- windows_x64_test
- macos_x64_test
diff --git a/.gitignore b/.gitignore
index c34d27c8470..cf21c8919cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
/dist/
/.idea/
/.vscode/
+/nbproject/
nbproject/private/
/webrev
/.src-rev
@@ -14,3 +15,4 @@ test/nashorn/lib
NashornProfile.txt
**/JTreport/**
**/JTwork/**
+/src/utils/LogCompilation/target/
diff --git a/.hgignore b/.hgignore
deleted file mode 100644
index 312ce62a641..00000000000
--- a/.hgignore
+++ /dev/null
@@ -1,18 +0,0 @@
-^build/
-^dist/
-^.idea/
-^.vscode/
-nbproject/private/
-^webrev
-^.src-rev$
-^.jib/
-(^|/)\.DS_Store
-(^|/)\.metadata/
-(^|/)\.recommenders/
-test/nashorn/script/external
-test/nashorn/lib
-NashornProfile.txt
-(^|/)JTreport/
-(^|/)JTwork/
-(^|/)\.git/
-^src/utils/hsdis/build/
\ No newline at end of file
diff --git a/doc/building.html b/doc/building.html
index 5f615f9d4ef..318a24aa840 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -78,6 +78,7 @@
Building the JDK
Native Libraries
Creating And Using Sysroots With qemu-deboostrap
Building for ARM/aarch64
+Building for musl
Verifying the Build
Build Performance
@@ -224,6 +225,8 @@ Linux
sudo apt-get install build-essential
For rpm-based distributions (Fedora, Red Hat, etc), try this:
sudo yum groupinstall "Development Tools"
+For Alpine Linux, aside from basic tooling, install the GNU versions of some programs:
+sudo apk add build-base bash grep zip
AIX
Please consult the AIX section of the Supported Build Platforms OpenJDK Build Wiki page for details about which versions of AIX are supported.
@@ -313,6 +316,7 @@ FreeType
- To install on an apt-based Linux, try running
sudo apt-get install libfreetype6-dev
.
- To install on an rpm-based Linux, try running
sudo yum install freetype-devel
.
+- To install on Alpine Linux, try running
sudo apk add freetype-dev
.
Use --with-freetype-include=<path>
and --with-freetype-lib=<path>
if configure
does not automatically locate the platform FreeType files.
CUPS
@@ -320,6 +324,7 @@ CUPS
- To install on an apt-based Linux, try running
sudo apt-get install libcups2-dev
.
- To install on an rpm-based Linux, try running
sudo yum install cups-devel
.
+- To install on Alpine Linux, try running
sudo apk add cups-dev
.
Use --with-cups=<path>
if configure
does not properly locate your CUPS files.
X11
@@ -327,6 +332,7 @@ X11
- To install on an apt-based Linux, try running
sudo apt-get install libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev
.
- To install on an rpm-based Linux, try running
sudo yum install libXtst-devel libXt-devel libXrender-devel libXrandr-devel libXi-devel
.
+- To install on Alpine Linux, try running
sudo apk add libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev
.
Use --with-x=<path>
if configure
does not properly locate your X11 files.
ALSA
@@ -334,6 +340,7 @@ ALSA
- To install on an apt-based Linux, try running
sudo apt-get install libasound2-dev
.
- To install on an rpm-based Linux, try running
sudo yum install alsa-lib-devel
.
+- To install on Alpine Linux, try running
sudo apk add alsa-lib-dev
.
Use --with-alsa=<path>
if configure
does not properly locate your ALSA files.
libffi
@@ -341,6 +348,7 @@ libffi
- To install on an apt-based Linux, try running
sudo apt-get install libffi-dev
.
- To install on an rpm-based Linux, try running
sudo yum install libffi-devel
.
+- To install on Alpine Linux, try running
sudo apk add libffi-dev
.
Use --with-libffi=<path>
if configure
does not properly locate your libffi files.
@@ -349,6 +357,7 @@ Autoconf
- To install on an apt-based Linux, try running
sudo apt-get install autoconf
.
- To install on an rpm-based Linux, try running
sudo yum install autoconf
.
+- To install on Alpine Linux, try running
sudo apk add autoconf
.
- To install on macOS, try running
brew install autoconf
.
- To install on Windows, try running
<path to Cygwin setup>/setup-x86_64 -q -P autoconf
.
@@ -620,21 +629,30 @@ Creating And Using Sys
Fortunately, you can create sysroots for foreign architectures with tools provided by your OS. On Debian/Ubuntu systems, one could use qemu-deboostrap
to create the target system chroot, which would have the native libraries and headers specific to that target system. After that, we can use the cross-compiler on the build system, pointing into chroot to get the build dependencies right. This allows building for foreign architectures with native compilation speed.
For example, cross-compiling to AArch64 from x86_64 could be done like this:
-- Install cross-compiler on the build system:
-
-apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
-
-- Create chroot on the build system, configuring it for target system:
-
-sudo qemu-debootstrap --arch=arm64 --verbose \
- --include=fakeroot,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng12-dev \
- --resolve-deps jessie /chroots/arm64 http://httpredir.debian.org/debian/
-
-- Configure and build with newly created chroot as sysroot/toolchain-path:
-
-CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure --openjdk-target=aarch64-linux-gnu --with-sysroot=/chroots/arm64/ --with-toolchain-path=/chroots/arm64/
+Install cross-compiler on the build system:
+apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
+Create chroot on the build system, configuring it for target system:
+sudo qemu-debootstrap \
+ --arch=arm64 \
+ --verbose \
+ --include=fakeroot,symlinks,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng-dev \
+ --resolve-deps \
+ buster \
+ ~/sysroot-arm64 \
+ http://httpredir.debian.org/debian/
+Make sure the symlinks inside the newly created chroot point to proper locations:
+sudo chroot ~/sysroot-arm64 symlinks -cr .
+Configure and build with newly created chroot as sysroot/toolchain-path:
+CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure \
+ --openjdk-target=aarch64-linux-gnu \
+ --with-sysroot=~/sysroot-arm64 \
+ --with-toolchain-path=~/sysroot-arm64 \
+ --with-freetype-lib=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ \
+ --with-freetype-include=~/sysroot-arm64/usr/include/freetype2/ \
+ --x-libraries=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/
make images
-ls build/linux-aarch64-normal-server-release/
+ls build/linux-aarch64-server-release/
+
The build does not create new files in that chroot, so it can be reused for multiple builds without additional cleanup.
Architectures that are known to successfully cross-compile like this are:
@@ -688,6 +706,15 @@ Creating And Using Sys
Additional architectures might be supported by Debian/Ubuntu Ports.
Building for ARM/aarch64
A common cross-compilation target is the ARM CPU. When building for ARM, it is useful to set the ABI profile. A number of pre-defined ABI profiles are available using --with-abi-profile
: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK.
+Building for musl
+Just like it's possible to cross-compile for a different CPU, it's possible to cross-compile for musl libc on a glibc-based build system. A devkit suitable for most target CPU architectures can be obtained from musl.cc. After installing the required packages in the sysroot, configure the build with --openjdk-target
:
+sh ./configure --with-jvm-variants=server \
+--with-boot-jdk=$BOOT_JDK \
+--with-build-jdk=$BUILD_JDK \
+--openjdk-target=x86_64-unknown-linux-musl \
+--with-devkit=$DEVKIT \
+--with-sysroot=$SYSROOT
+and run make
normally.
Verifying the Build
The build will end up in a directory named like build/linux-arm-normal-server-release
.
Inside this build output directory, the images/jdk
will contain the newly built JDK, for your target system.
diff --git a/doc/building.md b/doc/building.md
index 47fa445998d..e0ac5c7b6c7 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -273,6 +273,13 @@ For rpm-based distributions (Fedora, Red Hat, etc), try this:
sudo yum groupinstall "Development Tools"
```
+For Alpine Linux, aside from basic tooling, install the GNU versions of some
+programs:
+
+```
+sudo apk add build-base bash grep zip
+```
+
### AIX
Please consult the AIX section of the [Supported Build Platforms](
@@ -431,6 +438,7 @@ rather than bundling the JDK's own copy.
libfreetype6-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
freetype-devel`.
+ * To install on Alpine Linux, try running `sudo apk add freetype-dev`.
Use `--with-freetype-include=` and `--with-freetype-lib=`
if `configure` does not automatically locate the platform FreeType files.
@@ -445,6 +453,7 @@ your operating system.
libcups2-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
cups-devel`.
+ * To install on Alpine Linux, try running `sudo apk add cups-dev`.
Use `--with-cups=` if `configure` does not properly locate your CUPS
files.
@@ -458,6 +467,8 @@ Linux.
libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
libXtst-devel libXt-devel libXrender-devel libXrandr-devel libXi-devel`.
+ * To install on Alpine Linux, try running `sudo apk add libx11-dev
+ libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`.
Use `--with-x=` if `configure` does not properly locate your X11 files.
@@ -470,6 +481,7 @@ required on Linux. At least version 0.9.1 of ALSA is required.
libasound2-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
alsa-lib-devel`.
+ * To install on Alpine Linux, try running `sudo apk add alsa-lib-dev`.
Use `--with-alsa=` if `configure` does not properly locate your ALSA
files.
@@ -484,6 +496,7 @@ Hotspot.
libffi-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
libffi-devel`.
+ * To install on Alpine Linux, try running `sudo apk add libffi-dev`.
Use `--with-libffi=` if `configure` does not properly locate your libffi
files.
@@ -499,6 +512,7 @@ platforms. At least version 2.69 is required.
autoconf`.
* To install on an rpm-based Linux, try running `sudo yum install
autoconf`.
+ * To install on Alpine Linux, try running `sudo apk add autoconf`.
* To install on macOS, try running `brew install autoconf`.
* To install on Windows, try running `/setup-x86_64 -q
-P autoconf`.
@@ -1072,23 +1086,39 @@ for foreign architectures with native compilation speed.
For example, cross-compiling to AArch64 from x86_64 could be done like this:
* Install cross-compiler on the *build* system:
-```
-apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
-```
+ ```
+ apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
+ ```
* Create chroot on the *build* system, configuring it for *target* system:
-```
-sudo qemu-debootstrap --arch=arm64 --verbose \
- --include=fakeroot,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng12-dev \
- --resolve-deps jessie /chroots/arm64 http://httpredir.debian.org/debian/
-```
+ ```
+ sudo qemu-debootstrap \
+ --arch=arm64 \
+ --verbose \
+ --include=fakeroot,symlinks,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng-dev \
+ --resolve-deps \
+ buster \
+ ~/sysroot-arm64 \
+ http://httpredir.debian.org/debian/
+ ```
+
+ * Make sure the symlinks inside the newly created chroot point to proper locations:
+ ```
+ sudo chroot ~/sysroot-arm64 symlinks -cr .
+ ```
* Configure and build with newly created chroot as sysroot/toolchain-path:
-```
-CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure --openjdk-target=aarch64-linux-gnu --with-sysroot=/chroots/arm64/ --with-toolchain-path=/chroots/arm64/
-make images
-ls build/linux-aarch64-normal-server-release/
-```
+ ```
+ CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure \
+ --openjdk-target=aarch64-linux-gnu \
+ --with-sysroot=~/sysroot-arm64 \
+ --with-toolchain-path=~/sysroot-arm64 \
+ --with-freetype-lib=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ \
+ --with-freetype-include=~/sysroot-arm64/usr/include/freetype2/ \
+ --x-libraries=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/
+ make images
+ ls build/linux-aarch64-server-release/
+ ```
The build does not create new files in that chroot, so it can be reused for multiple builds
without additional cleanup.
@@ -1113,6 +1143,25 @@ available using `--with-abi-profile`: arm-vfp-sflt, arm-vfp-hflt, arm-sflt,
armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer
properly supported by the JDK.
+### Building for musl
+
+Just like it's possible to cross-compile for a different CPU, it's possible to
+cross-compile for musl libc on a glibc-based *build* system.
+A devkit suitable for most target CPU architectures can be obtained from
+[musl.cc](https://musl.cc). After installing the required packages in the
+sysroot, configure the build with `--openjdk-target`:
+
+```
+sh ./configure --with-jvm-variants=server \
+--with-boot-jdk=$BOOT_JDK \
+--with-build-jdk=$BUILD_JDK \
+--openjdk-target=x86_64-unknown-linux-musl \
+--with-devkit=$DEVKIT \
+--with-sysroot=$SYSROOT
+```
+
+and run `make` normally.
+
### Verifying the Build
The build will end up in a directory named like
diff --git a/make/Bundles.gmk b/make/Bundles.gmk
index b7c8ddbfbe7..b52b5720772 100644
--- a/make/Bundles.gmk
+++ b/make/Bundles.gmk
@@ -410,17 +410,43 @@ endif
################################################################################
-ifneq ($(filter docs-bundles, $(MAKECMDGOALS)), )
- DOCS_BUNDLE_FILES := $(call FindFiles, $(DOCS_IMAGE_DIR))
+ifneq ($(filter docs-jdk-bundles, $(MAKECMDGOALS)), )
+ DOCS_JDK_BUNDLE_FILES := $(call FindFiles, $(DOCS_JDK_IMAGE_DIR))
- $(eval $(call SetupBundleFile, BUILD_DOCS_BUNDLE, \
- BUNDLE_NAME := $(DOCS_BUNDLE_NAME), \
- FILES := $(DOCS_BUNDLE_FILES), \
- BASE_DIRS := $(DOCS_IMAGE_DIR), \
+ $(eval $(call SetupBundleFile, BUILD_DOCS_JDK_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_JDK_BUNDLE_NAME), \
+ FILES := $(DOCS_JDK_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_JDK_IMAGE_DIR), \
SUBDIR := docs, \
))
- DOCS_TARGETS += $(BUILD_DOCS_BUNDLE)
+ DOCS_JDK_TARGETS += $(BUILD_DOCS_JDK_BUNDLE)
+endif
+
+ifneq ($(filter docs-javase-bundles, $(MAKECMDGOALS)), )
+ DOCS_JAVASE_BUNDLE_FILES := $(call FindFiles, $(DOCS_JAVASE_IMAGE_DIR))
+
+ $(eval $(call SetupBundleFile, BUILD_DOCS_JAVASE_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_JAVASE_BUNDLE_NAME), \
+ FILES := $(DOCS_JAVASE_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_JAVASE_IMAGE_DIR), \
+ SUBDIR := docs-javase, \
+ ))
+
+ DOCS_JAVASE_TARGETS += $(BUILD_DOCS_JAVASE_BUNDLE)
+endif
+
+ifneq ($(filter docs-reference-bundles, $(MAKECMDGOALS)), )
+ DOCS_REFERENCE_BUNDLE_FILES := $(call FindFiles, $(DOCS_REFERENCE_IMAGE_DIR))
+
+ $(eval $(call SetupBundleFile, BUILD_DOCS_REFERENCE_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_REFERENCE_BUNDLE_NAME), \
+ FILES := $(DOCS_REFERENCE_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_REFERENCE_IMAGE_DIR), \
+ SUBDIR := docs-reference, \
+ ))
+
+ DOCS_REFERENCE_TARGETS += $(BUILD_DOCS_REFERENCE_BUNDLE)
endif
################################################################################
@@ -469,9 +495,12 @@ $(eval $(call IncludeCustomExtension, Bundles.gmk))
product-bundles: $(PRODUCT_TARGETS)
legacy-bundles: $(LEGACY_TARGETS)
test-bundles: $(TEST_TARGETS)
-docs-bundles: $(DOCS_TARGETS)
+docs-jdk-bundles: $(DOCS_JDK_TARGETS)
+docs-javase-bundles: $(DOCS_JAVASE_TARGETS)
+docs-reference-bundles: $(DOCS_REFERENCE_TARGETS)
static-libs-bundles: $(STATIC_LIBS_TARGETS)
jcov-bundles: $(JCOV_TARGETS)
-.PHONY: all default product-bundles test-bundles docs-bundles \
+.PHONY: all default product-bundles test-bundles \
+ docs-jdk-bundles docs-javase-bundles docs-reference-bundles \
static-libs-bundles jcov-bundles
diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
index c4d25c90122..e8997e0da83 100644
--- a/make/CompileJavaModules.gmk
+++ b/make/CompileJavaModules.gmk
@@ -184,10 +184,6 @@ ifeq ($(call isTargetOs, windows), true)
java.desktop_EXCLUDES += com/sun/java/swing/plaf/gtk
endif
-ifdef BUILD_HEADLESS_ONLY
- java.desktop_EXCLUDES += sun/applet
-endif
-
ifeq ($(call isTargetOs, windows macosx), false)
java.desktop_EXCLUDE_FILES += sun/awt/AWTCharset.java
endif
@@ -389,11 +385,11 @@ endif
################################################################################
-jdk.incubator.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \
+jdk.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \
.postrm .postinst .list .sh .desktop .copyright .control .plist .template \
.icns .scpt .wxs .wxl .wxi .ico .bmp .tiff
-jdk.incubator.jpackage_CLEAN += .properties
+jdk.jpackage_CLEAN += .properties
################################################################################
@@ -546,6 +542,10 @@ jdk.jfr_DISABLED_WARNINGS += exports
jdk.jfr_COPY := .xsd .xml .dtd
jdk.jfr_JAVAC_FLAGS := -XDstringConcat=inline
+################################################################################
+
+jdk.incubator.vector_DOCLINT += -Xdoclint:all/protected
+
################################################################################
# If this is an imported module that has prebuilt classes, only compile
# module-info.java.
diff --git a/make/CompileModuleTools.gmk b/make/CompileModuleTools.gmk
index c6322e5b36e..18cd42f0612 100644
--- a/make/CompileModuleTools.gmk
+++ b/make/CompileModuleTools.gmk
@@ -33,8 +33,20 @@ include JavaCompilation.gmk
TOOLS_CLASSES_DIR := $(BUILDTOOLS_OUTPUTDIR)/tools_jigsaw_classes
+# When using an external BUILDJDK, make it possible to shortcut building of
+# these tools using the BUILD_JAVAC instead of having to build the complete
+# exploded image first.
+ifeq ($(EXTERNAL_BUILDJDK), true)
+ COMPILER := buildjdk
+ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK)
+else
+ COMPILER := interim
+ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED)
+endif
+
$(eval $(call SetupJavaCompilation, BUILD_JIGSAW_TOOLS, \
- TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
+ TARGET_RELEASE := $(TARGET_RELEASE), \
+ COMPILER := $(COMPILER), \
SRC := $(TOPDIR)/make/jdk/src/classes, \
INCLUDES := build/tools/deps \
build/tools/docs \
diff --git a/make/CompileToolsJdk.gmk b/make/CompileToolsJdk.gmk
index a671f934998..2f09476aa67 100644
--- a/make/CompileToolsJdk.gmk
+++ b/make/CompileToolsJdk.gmk
@@ -56,7 +56,8 @@ $(eval $(call SetupJavaCompilation, BUILD_TOOLS_JDK, \
DISABLED_WARNINGS := options, \
JAVAC_FLAGS := \
--add-exports java.desktop/sun.awt=ALL-UNNAMED \
- --add-exports java.base/sun.text=ALL-UNNAMED, \
+ --add-exports java.base/sun.text=ALL-UNNAMED \
+ --add-exports java.base/sun.security.util=ALL-UNNAMED, \
))
TARGETS += $(BUILD_TOOLS_JDK)
diff --git a/make/Docs.gmk b/make/Docs.gmk
index 19e962b79af..1f7a0caf819 100644
--- a/make/Docs.gmk
+++ b/make/Docs.gmk
@@ -458,7 +458,7 @@ $(eval $(call SetupApiDocsGeneration, JAVASE_API, \
MODULES := $(JAVASE_MODULES), \
SHORT_NAME := $(JAVASE_SHORT_NAME), \
LONG_NAME := $(JAVASE_LONG_NAME), \
- TARGET_DIR := $(IMAGES_OUTPUTDIR)/javase-docs/api, \
+ TARGET_DIR := $(DOCS_JAVASE_IMAGE_DIR)/api, \
))
# Targets generated are returned in JAVASE_API_JAVADOC_TARGETS and
@@ -476,7 +476,7 @@ $(eval $(call SetupApiDocsGeneration, REFERENCE_API, \
MODULES := $(JAVASE_MODULES), \
SHORT_NAME := $(JAVASE_SHORT_NAME), \
LONG_NAME := $(JAVASE_LONG_NAME), \
- TARGET_DIR := $(IMAGES_OUTPUTDIR)/reference-docs/api, \
+ TARGET_DIR := $(DOCS_REFERENCE_IMAGE_DIR)/api, \
JAVADOC_CMD := $(JAVADOC), \
OPTIONS := $(REFERENCE_OPTIONS), \
TAGS := $(REFERENCE_TAGS), \
diff --git a/make/Main.gmk b/make/Main.gmk
index 493b795d35a..cdb4be67c56 100644
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -90,7 +90,6 @@ $(eval $(call SetupTarget, buildtools-jdk, \
$(eval $(call SetupTarget, buildtools-modules, \
MAKEFILE := CompileModuleTools, \
- DEPS := exploded-image-base, \
))
$(eval $(call SetupTarget, buildtools-hotspot, \
@@ -339,7 +338,7 @@ $(eval $(call SetupTarget, test-image-demos-jdk, \
$(eval $(call SetupTarget, generate-summary, \
MAKEFILE := GenerateModuleSummary, \
- DEPS := jmods buildtools-modules, \
+ DEPS := jmods buildtools-modules runnable-buildjdk, \
))
################################################################################
@@ -469,7 +468,7 @@ $(eval $(call SetupTarget, docs-jdk-api-javadoc, \
$(eval $(call SetupTarget, docs-jdk-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-jdk-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
$(eval $(call SetupTarget, docs-javase-api-javadoc, \
@@ -480,7 +479,7 @@ $(eval $(call SetupTarget, docs-javase-api-javadoc, \
$(eval $(call SetupTarget, docs-javase-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-javase-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
$(eval $(call SetupTarget, docs-reference-api-javadoc, \
@@ -491,7 +490,7 @@ $(eval $(call SetupTarget, docs-reference-api-javadoc, \
$(eval $(call SetupTarget, docs-reference-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-reference-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
# The gensrc steps for jdk.jdi create html spec files.
@@ -749,12 +748,24 @@ $(eval $(call SetupTarget, test-bundles, \
DEPS := test-image, \
))
-$(eval $(call SetupTarget, docs-bundles, \
+$(eval $(call SetupTarget, docs-jdk-bundles, \
MAKEFILE := Bundles, \
- TARGET := docs-bundles, \
+ TARGET := docs-jdk-bundles, \
DEPS := docs-image, \
))
+$(eval $(call SetupTarget, docs-javase-bundles, \
+ MAKEFILE := Bundles, \
+ TARGET := docs-javase-bundles, \
+ DEPS := docs-javase-image, \
+))
+
+$(eval $(call SetupTarget, docs-reference-bundles, \
+ MAKEFILE := Bundles, \
+ TARGET := docs-reference-bundles, \
+ DEPS := docs-reference-image, \
+))
+
$(eval $(call SetupTarget, static-libs-bundles, \
MAKEFILE := Bundles, \
TARGET := static-libs-bundles, \
@@ -945,10 +956,13 @@ else
$(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): java.base-libs java.base-copy \
java.base-gendata jdk.jlink-launchers java
endif
- else
- # The normal non cross compilation case uses needs to wait for the full
+ else ifeq ($(EXTERNAL_BUILDJDK), false)
+ # The normal non cross compilation usecase needs to wait for the full
# exploded-image to avoid a race with the optimize target.
$(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): exploded-image
+ # The buildtools-modules are used for the exploded-image-optimize target,
+ # but can be built either using the exploded-image or an external BUILDJDK.
+ buildtools-modules: exploded-image-base
endif
# All modules include the main license files from java.base.
@@ -1069,6 +1083,18 @@ ifneq ($(COMPILE_TYPE), cross)
exploded-image: exploded-image-optimize
endif
+# The runnable-buildjdk target guarantees that the buildjdk is done
+# building and ready to be used. The exact set of dependencies it needs
+# depends on what kind of buildjdk is used for the current configuration.
+runnable-buildjdk:
+ifeq ($(CREATE_BUILDJDK), true)
+ ifneq ($(CREATING_BUILDJDK), true)
+ runnable-buildjdk: create-buildjdk
+ endif
+else ifeq ($(EXTERNAL_BUILDJDK), false)
+ runnable-buildjdk: exploded-image
+endif
+
create-buildjdk: create-buildjdk-interim-image
docs-jdk-api: docs-jdk-api-javadoc
@@ -1122,8 +1148,16 @@ ifeq ($(call isTargetOs, macosx), true)
legacy-images: mac-legacy-jre-bundle
endif
-# This target builds the documentation image
-docs-image: docs-jdk
+# These targets build the various documentation images
+docs-jdk-image: docs-jdk
+docs-javase-image: docs-javase
+docs-reference-image: docs-reference
+# The docs-jdk-image is what most users expect to be built
+docs-image: docs-jdk-image
+all-docs-images: docs-jdk-image docs-javase-image docs-reference-image
+
+docs-bundles: docs-jdk-bundles
+all-docs-bundles: docs-jdk-bundles docs-javase-bundles docs-reference-bundles
# This target builds the test image
test-image: prepare-test-image test-image-jdk-jtreg-native \
@@ -1156,7 +1190,7 @@ endif
################################################################################
# all-images builds all our deliverables as images.
-all-images: product-images test-image docs-image
+all-images: product-images test-image all-docs-images
# all-bundles packages all our deliverables as tar.gz bundles.
all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles
@@ -1164,10 +1198,11 @@ all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles
ALL_TARGETS += buildtools hotspot hotspot-libs hotspot-gensrc gensrc gendata \
copy java libs static-libs launchers jmods \
jdk.jdwp.agent-gensrc $(ALL_MODULES) demos \
- exploded-image-base exploded-image \
+ exploded-image-base exploded-image runnable-buildjdk \
create-buildjdk docs-jdk-api docs-javase-api docs-reference-api docs-jdk \
docs-javase docs-reference docs-javadoc mac-bundles product-images legacy-images \
- docs-image test-image all-images \
+ docs-image docs-javase-image docs-reference-image all-docs-images \
+ docs-bundles all-docs-bundles test-image all-images \
all-bundles
################################################################################
diff --git a/make/ReleaseFile.gmk b/make/ReleaseFile.gmk
index 14ebc9c32ae..0424e2fb623 100644
--- a/make/ReleaseFile.gmk
+++ b/make/ReleaseFile.gmk
@@ -53,6 +53,7 @@ define create-info-file
$(call info-file-item, "JAVA_VERSION_DATE", "$(VERSION_DATE)")
$(call info-file-item, "OS_NAME", "$(RELEASE_FILE_OS_NAME)")
$(call info-file-item, "OS_ARCH", "$(RELEASE_FILE_OS_ARCH)")
+ $(call info-file-item, "LIBC", "$(RELEASE_FILE_LIBC)")
endef
# Param 1 - The file containing the MODULES list
diff --git a/make/ToolsJdk.gmk b/make/ToolsJdk.gmk
index 296411559a9..45a0cc8c64e 100644
--- a/make/ToolsJdk.gmk
+++ b/make/ToolsJdk.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -68,6 +68,7 @@ TOOL_TZDB = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.tzdb.TzdbZoneRulesCompiler
TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ --add-exports java.base/sun.security.util=ALL-UNNAMED \
build.tools.blacklistedcertsconverter.BlacklistedCertsConverter
TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
index b650b5109d0..14f21a25e8f 100644
--- a/make/autoconf/build-aux/config.guess
+++ b/make/autoconf/build-aux/config.guess
@@ -30,6 +30,17 @@
DIR=`dirname $0`
OUT=`. $DIR/autoconf-config.guess`
+# Detect C library.
+# Use '-gnu' suffix on systems that use glibc.
+# Use '-musl' suffix on systems that use the musl libc.
+echo $OUT | grep -- -linux- > /dev/null 2> /dev/null
+if test $? = 0; then
+ libc_vendor=`ldd --version 2>&1 | sed -n '1s/.*\(musl\).*/\1/p'`
+ if [ x"${libc_vendor}" = x"musl" ]; then
+ OUT=`echo $OUT | sed 's/-gnu/-musl/'`
+ fi
+fi
+
# Test and fix cygwin on x86_64
echo $OUT | grep 86-pc-cygwin > /dev/null 2> /dev/null
if test $? != 0; then
diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
index a36e6690728..d0dd001abdf 100644
--- a/make/autoconf/build-aux/config.sub
+++ b/make/autoconf/build-aux/config.sub
@@ -29,6 +29,11 @@
DIR=`dirname $0`
+if echo $* | grep linux-musl >/dev/null ; then
+ echo $*
+ exit
+fi
+
# Allow wsl
if echo $* | grep x86_64-pc-wsl >/dev/null ; then
echo $*
diff --git a/make/autoconf/buildjdk-spec.gmk.in b/make/autoconf/buildjdk-spec.gmk.in
index 7134e34bcee..524f35f417c 100644
--- a/make/autoconf/buildjdk-spec.gmk.in
+++ b/make/autoconf/buildjdk-spec.gmk.in
@@ -54,11 +54,13 @@ IMAGES_OUTPUTDIR := $(patsubst $(OUTPUTDIR)%,$(BUILDJDK_OUTPUTDIR)%,$(IMAGES_OUT
OPENJDK_BUILD_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@
OPENJDK_BUILD_CPU_LEGACY_LIB := @OPENJDK_BUILD_CPU_LEGACY_LIB@
+OPENJDK_BUILD_LIBC := @OPENJDK_BUILD_LIBC@
OPENJDK_TARGET_CPU := @OPENJDK_BUILD_CPU@
OPENJDK_TARGET_CPU_ARCH := @OPENJDK_BUILD_CPU_ARCH@
OPENJDK_TARGET_CPU_BITS := @OPENJDK_BUILD_CPU_BITS@
OPENJDK_TARGET_CPU_ENDIAN := @OPENJDK_BUILD_CPU_ENDIAN@
OPENJDK_TARGET_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@
+OPENJDK_TARGET_LIBC := @OPENJDK_BUILD_LIBC@
OPENJDK_TARGET_OS_INCLUDE_SUBDIR := @OPENJDK_BUILD_OS_INCLUDE_SUBDIR@
HOTSPOT_TARGET_OS := @HOTSPOT_BUILD_OS@
@@ -66,6 +68,7 @@ HOTSPOT_TARGET_OS_TYPE := @HOTSPOT_BUILD_OS_TYPE@
HOTSPOT_TARGET_CPU := @HOTSPOT_BUILD_CPU@
HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_BUILD_CPU_ARCH@
HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_BUILD_CPU_DEFINE@
+HOTSPOT_TARGET_LIBC := @HOTSPOT_BUILD_LIBC@
CFLAGS_JDKLIB := @OPENJDK_BUILD_CFLAGS_JDKLIB@
CXXFLAGS_JDKLIB := @OPENJDK_BUILD_CXXFLAGS_JDKLIB@
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index 588df7f0011..d4738ad6837 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -231,8 +231,14 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
# -D_FORTIFY_SOURCE=2 hardening option needs optimization (at least -O1) enabled
# set for lower O-levels -U_FORTIFY_SOURCE to overwrite previous settings
if test "x$OPENJDK_TARGET_OS" = xlinux -a "x$DEBUG_LEVEL" = "xfastdebug"; then
- ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2"
DISABLE_FORTIFY_CFLAGS="-U_FORTIFY_SOURCE"
+ # ASan doesn't work well with _FORTIFY_SOURCE
+ # See https://github.com/google/sanitizers/wiki/AddressSanitizer#faq
+ if test "x$ASAN_ENABLED" = xyes; then
+ ENABLE_FORTIFY_CFLAGS="${DISABLE_FORTIFY_CFLAGS}"
+ else
+ ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2"
+ fi
C_O_FLAG_HIGHEST_JVM="${C_O_FLAG_HIGHEST_JVM} ${ENABLE_FORTIFY_CFLAGS}"
C_O_FLAG_HIGHEST="${C_O_FLAG_HIGHEST} ${ENABLE_FORTIFY_CFLAGS}"
C_O_FLAG_HI="${C_O_FLAG_HI} ${ENABLE_FORTIFY_CFLAGS}"
@@ -558,6 +564,11 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
fi
fi
+ OS_CFLAGS="$OS_CFLAGS -DLIBC=$OPENJDK_TARGET_LIBC"
+ if test "x$OPENJDK_TARGET_LIBC" = xmusl; then
+ OS_CFLAGS="$OS_CFLAGS -DMUSL_LIBC"
+ fi
+
# Where does this really belong??
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
PICFLAG="-fPIC"
@@ -652,16 +663,10 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
$1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -DARCH='\"$FLAGS_CPU_LEGACY\"' \
-D$FLAGS_CPU_LEGACY"
- if test "x$FLAGS_CPU_BITS" = x64; then
- # -D_LP64=1 is only set on linux and mac. Setting on windows causes diff in
- # unpack200.exe.
- if test "x$FLAGS_OS" = xlinux || test "x$FLAGS_OS" = xmacosx; then
- $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1"
- fi
- if test "x$FLAGS_OS" != xaix; then
- # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it.
- $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1"
- fi
+ if test "x$FLAGS_CPU_BITS" = x64 && test "x$FLAGS_OS" != xaix; then
+ # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it.
+ $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1"
+ $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1"
fi
# toolchain dependend, per-cpu
diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
index 8f58db17d4a..a112a78d624 100644
--- a/make/autoconf/jdk-options.m4
+++ b/make/autoconf/jdk-options.m4
@@ -423,7 +423,10 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_ADDRESS_SANITIZER],
fi
],
IF_ENABLED: [
- ASAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer"
+ # ASan is simply incompatible with gcc -Wstringop-truncation. See
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85650
+ # It's harmless to be suppressed in clang as well.
+ ASAN_CFLAGS="-fsanitize=address -Wno-stringop-truncation -fno-omit-frame-pointer"
ASAN_LDFLAGS="-fsanitize=address"
JVM_CFLAGS="$JVM_CFLAGS $ASAN_CFLAGS"
JVM_LDFLAGS="$JVM_LDFLAGS $ASAN_LDFLAGS"
diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4
index 04ca7b4e909..5ad791795a7 100644
--- a/make/autoconf/jvm-features.m4
+++ b/make/autoconf/jvm-features.m4
@@ -306,7 +306,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_GRAAL],
# Graal is only available where JVMCI is available since it requires JVMCI.
if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then
AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then
+ elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
@@ -340,7 +340,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_JVMCI],
AC_MSG_CHECKING([if platform is supported by JVMCI])
if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then
AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then
+ elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
index 5120918aed2..e6aafe01550 100644
--- a/make/autoconf/libraries.m4
+++ b/make/autoconf/libraries.m4
@@ -43,9 +43,11 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES],
if test "x$OPENJDK_TARGET_OS" = xwindows || test "x$OPENJDK_TARGET_OS" = xmacosx; then
# No X11 support on windows or macosx
NEEDS_LIB_X11=false
+ elif test "x$ENABLE_HEADLESS_ONLY" = xtrue; then
+ # No X11 support needed when building headless only
+ NEEDS_LIB_X11=false
else
- # All other instances need X11, even if building headless only, libawt still
- # needs X11 headers.
+ # All other instances need X11
NEEDS_LIB_X11=true
fi
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
index c0f2446dbd7..2f39d2b0ca7 100644
--- a/make/autoconf/platform.m4
+++ b/make/autoconf/platform.m4
@@ -220,6 +220,24 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_OS],
esac
])
+# Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+# Converts autoconf style OS name to OpenJDK style, into
+# VAR_LIBC.
+AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_LIBC],
+[
+ case "$1" in
+ *linux*-musl)
+ VAR_LIBC=musl
+ ;;
+ *linux*-gnu)
+ VAR_LIBC=gnu
+ ;;
+ *)
+ VAR_LIBC=default
+ ;;
+ esac
+])
+
# Expects $host_os $host_cpu $build_os and $build_cpu
# and $with_target_bits to have been setup!
#
@@ -237,9 +255,10 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_TARGET_AUTOCONF_NAME)
AC_SUBST(OPENJDK_BUILD_AUTOCONF_NAME)
- # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables.
+ # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables.
PLATFORM_EXTRACT_VARS_FROM_OS($build_os)
PLATFORM_EXTRACT_VARS_FROM_CPU($build_cpu)
+ PLATFORM_EXTRACT_VARS_FROM_LIBC($build_os)
# ..and setup our own variables. (Do this explicitly to facilitate searching)
OPENJDK_BUILD_OS="$VAR_OS"
if test "x$VAR_OS_TYPE" != x; then
@@ -256,6 +275,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
OPENJDK_BUILD_CPU_ARCH="$VAR_CPU_ARCH"
OPENJDK_BUILD_CPU_BITS="$VAR_CPU_BITS"
OPENJDK_BUILD_CPU_ENDIAN="$VAR_CPU_ENDIAN"
+ OPENJDK_BUILD_LIBC="$VAR_LIBC"
AC_SUBST(OPENJDK_BUILD_OS)
AC_SUBST(OPENJDK_BUILD_OS_TYPE)
AC_SUBST(OPENJDK_BUILD_OS_ENV)
@@ -263,13 +283,20 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_BUILD_CPU_ARCH)
AC_SUBST(OPENJDK_BUILD_CPU_BITS)
AC_SUBST(OPENJDK_BUILD_CPU_ENDIAN)
+ AC_SUBST(OPENJDK_BUILD_LIBC)
AC_MSG_CHECKING([openjdk-build os-cpu])
AC_MSG_RESULT([$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU])
- # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables.
+ if test "x$OPENJDK_BUILD_OS" = "xlinux"; then
+ AC_MSG_CHECKING([openjdk-build C library])
+ AC_MSG_RESULT([$OPENJDK_BUILD_LIBC])
+ fi
+
+ # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables.
PLATFORM_EXTRACT_VARS_FROM_OS($host_os)
PLATFORM_EXTRACT_VARS_FROM_CPU($host_cpu)
+ PLATFORM_EXTRACT_VARS_FROM_LIBC($host_os)
# ... and setup our own variables. (Do this explicitly to facilitate searching)
OPENJDK_TARGET_OS="$VAR_OS"
if test "x$VAR_OS_TYPE" != x; then
@@ -287,6 +314,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
OPENJDK_TARGET_CPU_BITS="$VAR_CPU_BITS"
OPENJDK_TARGET_CPU_ENDIAN="$VAR_CPU_ENDIAN"
OPENJDK_TARGET_OS_UPPERCASE=`$ECHO $OPENJDK_TARGET_OS | $TR 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
+ OPENJDK_TARGET_LIBC="$VAR_LIBC"
AC_SUBST(OPENJDK_TARGET_OS)
AC_SUBST(OPENJDK_TARGET_OS_TYPE)
@@ -296,9 +324,15 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_TARGET_CPU_ARCH)
AC_SUBST(OPENJDK_TARGET_CPU_BITS)
AC_SUBST(OPENJDK_TARGET_CPU_ENDIAN)
+ AC_SUBST(OPENJDK_TARGET_LIBC)
AC_MSG_CHECKING([openjdk-target os-cpu])
AC_MSG_RESULT([$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU])
+
+ if test "x$OPENJDK_TARGET_OS" = "xlinux"; then
+ AC_MSG_CHECKING([openjdk-target C library])
+ AC_MSG_RESULT([$OPENJDK_TARGET_LIBC])
+ fi
])
# Check if a reduced build (32-bit on 64-bit platforms) is requested, and modify behaviour
@@ -420,7 +454,13 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
else
OPENJDK_$1_CPU_BUNDLE="$OPENJDK_$1_CPU"
fi
- OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}"
+
+ OPENJDK_$1_LIBC_BUNDLE=""
+ if test "x$OPENJDK_$1_LIBC" = "xmusl"; then
+ OPENJDK_$1_LIBC_BUNDLE="-$OPENJDK_$1_LIBC"
+ fi
+
+ OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}${OPENJDK_$1_LIBC_BUNDLE}"
AC_SUBST(OPENJDK_$1_BUNDLE_PLATFORM)
if test "x$COMPILE_TYPE" = "xcross"; then
@@ -493,6 +533,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
fi
AC_SUBST(HOTSPOT_$1_CPU_DEFINE)
+ HOTSPOT_$1_LIBC=$OPENJDK_$1_LIBC
+ AC_SUBST(HOTSPOT_$1_LIBC)
+
# For historical reasons, the OS include directories have odd names.
OPENJDK_$1_OS_INCLUDE_SUBDIR="$OPENJDK_TARGET_OS"
if test "x$OPENJDK_TARGET_OS" = "xwindows"; then
@@ -518,9 +561,11 @@ AC_DEFUN([PLATFORM_SET_RELEASE_FILE_OS_VALUES],
RELEASE_FILE_OS_NAME="AIX"
fi
RELEASE_FILE_OS_ARCH=${OPENJDK_TARGET_CPU}
+ RELEASE_FILE_LIBC=${OPENJDK_TARGET_LIBC}
AC_SUBST(RELEASE_FILE_OS_NAME)
AC_SUBST(RELEASE_FILE_OS_ARCH)
+ AC_SUBST(RELEASE_FILE_LIBC)
])
AC_DEFUN([PLATFORM_SET_MODULE_TARGET_OS_VALUES],
diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in
index 14d7a18a0e8..63dc9a5767d 100644
--- a/make/autoconf/spec.gmk.in
+++ b/make/autoconf/spec.gmk.in
@@ -80,6 +80,8 @@ OPENJDK_TARGET_CPU_ARCH:=@OPENJDK_TARGET_CPU_ARCH@
OPENJDK_TARGET_CPU_BITS:=@OPENJDK_TARGET_CPU_BITS@
OPENJDK_TARGET_CPU_ENDIAN:=@OPENJDK_TARGET_CPU_ENDIAN@
+OPENJDK_TARGET_LIBC:=@OPENJDK_TARGET_LIBC@
+
COMPILE_TYPE:=@COMPILE_TYPE@
# Legacy support
@@ -95,6 +97,8 @@ HOTSPOT_TARGET_CPU := @HOTSPOT_TARGET_CPU@
HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_TARGET_CPU_ARCH@
HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_TARGET_CPU_DEFINE@
+HOTSPOT_TARGET_LIBC := @HOTSPOT_TARGET_LIBC@
+
OPENJDK_TARGET_BUNDLE_PLATFORM:=@OPENJDK_TARGET_BUNDLE_PLATFORM@
JDK_ARCH_ABI_PROP_NAME := @JDK_ARCH_ABI_PROP_NAME@
@@ -109,6 +113,8 @@ OPENJDK_BUILD_CPU_ARCH:=@OPENJDK_BUILD_CPU_ARCH@
OPENJDK_BUILD_CPU_BITS:=@OPENJDK_BUILD_CPU_BITS@
OPENJDK_BUILD_CPU_ENDIAN:=@OPENJDK_BUILD_CPU_ENDIAN@
+OPENJDK_BUILD_LIBC:=@OPENJDK_BUILD_LIBC@
+
OPENJDK_BUILD_OS_INCLUDE_SUBDIR:=@OPENJDK_TARGET_OS_INCLUDE_SUBDIR@
# Target platform value in ModuleTarget class file attribute.
@@ -117,6 +123,7 @@ OPENJDK_MODULE_TARGET_PLATFORM:=@OPENJDK_MODULE_TARGET_PLATFORM@
# OS_* properties in release file
RELEASE_FILE_OS_NAME:=@RELEASE_FILE_OS_NAME@
RELEASE_FILE_OS_ARCH:=@RELEASE_FILE_OS_ARCH@
+RELEASE_FILE_LIBC:=@RELEASE_FILE_LIBC@
SOURCE_DATE := @SOURCE_DATE@
ENABLE_REPRODUCIBLE_BUILD := @ENABLE_REPRODUCIBLE_BUILD@
@@ -637,6 +644,7 @@ JARSIGNER=@FIXPATH@ $(JARSIGNER_CMD)
BUILD_JAVA_FLAGS := @BOOTCYCLE_JVM_ARGS_BIG@
BUILD_JAVA=@FIXPATH@ $(BUILD_JDK)/bin/java $(BUILD_JAVA_FLAGS)
+BUILD_JAVAC=@FIXPATH@ $(BUILD_JDK)/bin/javac
BUILD_JAR=@FIXPATH@ $(BUILD_JDK)/bin/jar
# Interim langtools modules and arguments
@@ -751,7 +759,6 @@ TAR_SUPPORTS_TRANSFORM:=@TAR_SUPPORTS_TRANSFORM@
# Build setup
ENABLE_AOT:=@ENABLE_AOT@
-ENABLE_INTREE_EC:=@ENABLE_INTREE_EC@
USE_EXTERNAL_LIBJPEG:=@USE_EXTERNAL_LIBJPEG@
USE_EXTERNAL_LIBGIF:=@USE_EXTERNAL_LIBGIF@
USE_EXTERNAL_LIBZ:=@USE_EXTERNAL_LIBZ@
@@ -869,10 +876,14 @@ INTERIM_JMODS_DIR := $(SUPPORT_OUTPUTDIR)/interim-jmods
INTERIM_IMAGE_DIR := $(SUPPORT_OUTPUTDIR)/interim-image
# Docs image
-DOCS_IMAGE_SUBDIR := docs
-DOCS_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_IMAGE_SUBDIR)
+DOCS_JDK_IMAGE_SUBDIR := docs
+DOCS_JDK_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JDK_IMAGE_SUBDIR)
+DOCS_JAVASE_IMAGE_SUBDIR := docs-javase
+DOCS_JAVASE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JAVASE_IMAGE_SUBDIR)
+DOCS_REFERENCE_IMAGE_SUBDIR := docs-reference
+DOCS_REFERENCE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_REFERENCE_IMAGE_SUBDIR)
# Output docs directly into image
-DOCS_OUTPUTDIR := $(DOCS_IMAGE_DIR)
+DOCS_OUTPUTDIR := $(DOCS_JDK_IMAGE_DIR)
# Static libs image
STATIC_LIBS_IMAGE_SUBDIR := static-libs
@@ -915,7 +926,9 @@ JRE_BUNDLE_NAME := jre-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
JDK_SYMBOLS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin$(DEBUG_PART)-symbols.tar.gz
TEST_DEMOS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests-demos$(DEBUG_PART).tar.gz
TEST_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests$(DEBUG_PART).tar.gz
-DOCS_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_JDK_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_JAVASE_BUNDLE_NAME := javase-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_REFERENCE_BUNDLE_NAME := jdk-reference-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
STATIC_LIBS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs$(DEBUG_PART).tar.gz
JCOV_BUNDLE_NAME := jdk-jcov-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
@@ -924,7 +937,9 @@ JRE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JRE_BUNDLE_NAME)
JDK_SYMBOLS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JDK_SYMBOLS_BUNDLE_NAME)
TEST_DEMOS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_DEMOS_BUNDLE_NAME)
TEST_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_BUNDLE_NAME)
-DOCS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_BUNDLE_NAME)
+DOCS_JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JDK_BUNDLE_NAME)
+DOCS_JAVASE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JAVASE_BUNDLE_NAME)
+DOCS_REFERENCE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_REFERENCE_BUNDLE_NAME)
JCOV_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JCOV_BUNDLE_NAME)
# This macro is called to allow inclusion of closed source counterparts.
diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk
index e0b1fb003d4..bedb971115b 100644
--- a/make/common/JavaCompilation.gmk
+++ b/make/common/JavaCompilation.gmk
@@ -202,6 +202,13 @@ define SetupJavaCompilationBody
# If unspecified, default to the new jdk we're building
$1_TARGET_RELEASE := $$(TARGET_RELEASE_BOOTJDK)
endif
+ else ifeq ($$($1_COMPILER), buildjdk)
+ $1_JAVAC_CMD := $$(BUILD_JAVAC)
+
+ ifeq ($$($1_TARGET_RELEASE), )
+ # If unspecified, default to the new jdk we're building
+ $1_TARGET_RELEASE := $$(TARGET_RELEASE_NEWJDK)
+ endif
else ifeq ($$($1_COMPILER), interim)
# Use java server if it is enabled, and the user does not want a specialized
# class path.
@@ -304,9 +311,11 @@ define SetupJavaCompilationBody
ifneq ($$($1_KEEP_DUPS), true)
# Remove duplicate source files by keeping the first found of each duplicate.
# This allows for automatic overrides with custom or platform specific versions
- # source files.
+ # source files. Need to call DoubleDollar as we have java classes with '$' in
+ # their names.
$1_SRCS := $$(strip $$(foreach s, $$($1_SRCS), \
- $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), $$(s))) \
+ $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), \
+ $$(call DoubleDollar, $$(s)))) \
$$(if $$($1_$$(relative_src)), \
, \
$$(eval $1_$$(relative_src) := 1) $$(s))))
diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index 21134966dc0..6a963ac2c49 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -77,8 +77,10 @@ ifeq ($(STATIC_LIBS), true)
FindStaticLib =
endif
+# Returns the module specific java header dir if it exists.
+# Param 1 - module name
GetJavaHeaderDir = \
- $(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1))
+ $(if $(strip $1),$(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1)))
# Process a dir description such as "java.base:headers" into a set of proper absolute paths.
ProcessDir = \
@@ -123,15 +125,27 @@ JDK_RCFLAGS=$(RCFLAGS) \
SetupJdkLibrary = $(NamedParamsMacroTemplate)
define SetupJdkLibraryBody
ifeq ($$($1_OUTPUT_DIR), )
- $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE))
+ ifneq ($$(MODULE), )
+ $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE))
+ else
+ $$(error Must specify OUTPUT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_OBJECT_DIR), )
- $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME)
+ ifneq ($$(MODULE), )
+ $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME)
+ else
+ $$(error Must specify OBJECT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_SRC), )
- $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME))
+ ifneq ($$(MODULE), )
+ $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME))
+ else
+ $$(error Must specify SRC in a MODULE free context)
+ endif
else
$1_SRC := $$(foreach dir, $$($1_SRC), $$(call ProcessDir, $$(dir)))
endif
@@ -165,7 +179,8 @@ define SetupJdkLibraryBody
ifneq ($$($1_HEADERS_FROM_SRC), false)
$1_SRC_HEADER_FLAGS := $$(addprefix -I, $$(wildcard $$($1_SRC)))
endif
- # Always add the java header dir
+
+ # Add the module specific java header dir
$1_SRC_HEADER_FLAGS += $$(addprefix -I, $$(call GetJavaHeaderDir, $$(MODULE)))
ifneq ($$($1_EXTRA_HEADER_DIRS), )
@@ -203,11 +218,19 @@ define SetupJdkExecutableBody
$1_TYPE := EXECUTABLE
ifeq ($$($1_OUTPUT_DIR), )
- $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE))
+ ifneq ($$(MODULE), )
+ $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE))
+ else
+ $$(error Must specify OUTPUT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_OBJECT_DIR), )
- $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME)
+ ifneq ($$(MODULE), )
+ $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME)
+ else
+ $$(error Must specify OBJECT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_VERSIONINFO_RESOURCE), )
diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk
index 72e19840501..10aacff4726 100644
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@@ -59,6 +59,7 @@ BOOT_MODULES += \
java.security.sasl \
java.xml \
jdk.incubator.foreign \
+ jdk.incubator.vector \
jdk.internal.vm.ci \
jdk.jfr \
jdk.management \
@@ -124,7 +125,7 @@ endif
JRE_TOOL_MODULES += \
jdk.jdwp.agent \
- jdk.incubator.jpackage \
+ jdk.jpackage \
#
################################################################################
@@ -144,7 +145,8 @@ DOCS_MODULES += \
jdk.editpad \
jdk.hotspot.agent \
jdk.httpserver \
- jdk.incubator.jpackage \
+ jdk.jpackage \
+ jdk.incubator.vector \
jdk.jartool \
jdk.javadoc \
jdk.jcmd \
@@ -226,7 +228,7 @@ endif
# jpackage is only on windows, macosx, and linux
ifeq ($(call isTargetOs, windows macosx linux), false)
- MODULES_FILTER += jdk.incubator.jpackage
+ MODULES_FILTER += jdk.jpackage
endif
################################################################################
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index e70d6617745..36460fee4b4 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -251,6 +251,8 @@ var getJibProfilesCommon = function (input, data) {
configure_args: concat("--enable-jtreg-failure-handler",
"--with-exclude-translations=de,es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK",
"--disable-manpages",
+ "--disable-jvm-feature-aot",
+ "--disable-jvm-feature-graal",
"--disable-jvm-feature-shenandoahgc",
versionArgs(input, common))
};
@@ -404,12 +406,11 @@ var getJibProfilesProfiles = function (input, common, data) {
"linux-x64": {
target_os: "linux",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "graphviz", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "graphviz", "pandoc"],
configure_args: concat(common.configure_args_64bit,
- "--enable-full-docs", "--with-zlib=system",
+ "--with-zlib=system", "--disable-dtrace",
(isWsl(input) ? [ "--host=x86_64-unknown-linux-gnu",
"--build=x86_64-unknown-linux-gnu" ] : [])),
- default_make_targets: ["docs-bundles"],
},
"linux-x86": {
@@ -424,7 +425,7 @@ var getJibProfilesProfiles = function (input, common, data) {
"macosx-x64": {
target_os: "macosx",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "pandoc"],
configure_args: concat(common.configure_args_64bit, "--with-zlib=system",
"--with-macosx-version-max=10.9.0",
// Use system SetFile instead of the one in the devkit as the
@@ -435,7 +436,7 @@ var getJibProfilesProfiles = function (input, common, data) {
"windows-x64": {
target_os: "windows",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "pandoc"],
configure_args: concat(common.configure_args_64bit),
},
@@ -455,8 +456,6 @@ var getJibProfilesProfiles = function (input, common, data) {
configure_args: [
"--openjdk-target=aarch64-linux-gnu",
"--disable-jvm-feature-jvmci",
- "--disable-jvm-feature-graal",
- "--disable-jvm-feature-aot",
],
},
@@ -680,20 +679,47 @@ var getJibProfilesProfiles = function (input, common, data) {
common.debug_profile_artifacts(artifactData[name]));
});
- profilesArtifacts = {
- "linux-x64": {
+ buildJdkDep = input.build_os + "-" + input.build_cpu + ".jdk";
+ docsProfiles = {
+ "docs": {
+ target_os: input.build_os,
+ target_cpu: input.build_cpu,
+ dependencies: [
+ "boot_jdk", "devkit", "graphviz", "pandoc", buildJdkDep,
+ ],
+ configure_args: concat(
+ "--enable-full-docs",
+ versionArgs(input, common),
+ "--with-build-jdk=" + input.get(buildJdkDep, "home_path")
+ + (input.build_os == "macosx" ? "/Contents/Home" : "")
+ ),
+ default_make_targets: ["all-docs-bundles"],
artifacts: {
doc_api_spec: {
- local: "bundles/\\(jdk.*doc-api-spec.tar.gz\\)",
+ local: "bundles/\\(jdk-" + data.version + ".*doc-api-spec.tar.gz\\)",
remote: [
"bundles/common/jdk-" + data.version + "_doc-api-spec.tar.gz",
"bundles/common/\\1"
],
},
+ javase_doc_api_spec: {
+ local: "bundles/\\(javase-" + data.version + ".*doc-api-spec.tar.gz\\)",
+ remote: [
+ "bundles/common/javase-" + data.version + "_doc-api-spec.tar.gz",
+ "bundles/common/\\1"
+ ],
+ },
+ reference_doc_api_spec: {
+ local: "bundles/\\(jdk-reference-" + data.version + ".*doc-api-spec.tar.gz\\)",
+ remote: [
+ "bundles/common/jdk-reference-" + data.version + "_doc-api-spec.tar.gz",
+ "bundles/common/\\1"
+ ],
+ },
}
}
};
- profiles = concatObjects(profiles, profilesArtifacts);
+ profiles = concatObjects(profiles, docsProfiles);
// Generate open only profiles for all the main and debug profiles.
// Rewrite artifact remote paths by adding "openjdk/GPL".
@@ -960,7 +986,7 @@ var getJibProfilesDependencies = function (input, common) {
var devkit_platform_revisions = {
linux_x64: "gcc10.2.0-OL6.4+1.0",
- macosx_x64: "Xcode11.3.1-MacOSX10.15+1.0",
+ macosx_x64: "Xcode11.3.1-MacOSX10.15+1.1",
windows_x64: "VS2019-16.7.2+1.0",
linux_aarch64: "gcc10.2.0-OL7.6+1.0",
linux_arm: "gcc8.2.0-Fedora27+1.0",
@@ -1126,15 +1152,6 @@ var getJibProfilesDependencies = function (input, common) {
configure_args: "",
},
- graalunit_lib: {
- organization: common.organization,
- ext: "zip",
- revision: "619_Apr_12_2018",
- module: "graalunit-lib",
- configure_args: "--with-graalunit-lib=" + input.get("graalunit_lib", "install_path"),
- environment_name: "GRAALUNIT_LIB"
- },
-
gtest: {
organization: common.organization,
ext: "tar.gz",
diff --git a/make/data/blacklistedcertsconverter/blacklisted.certs.pem b/make/data/blacklistedcertsconverter/blacklisted.certs.pem
index 191e94e12a5..688becbc493 100644
--- a/make/data/blacklistedcertsconverter/blacklisted.certs.pem
+++ b/make/data/blacklistedcertsconverter/blacklisted.certs.pem
@@ -1,8 +1,7 @@
#! java BlacklistedCertsConverter SHA-256
-# The line above must be the first line of the blacklisted.certs.pem
-# file inside src/share/lib/security/. It will be ignored if added in
-# src/closed/share/lib/security/blacklisted.certs.pem.
+# The line above must be the first line of this file. Do not
+# remove it.
// Subject: CN=Digisign Server ID (Enrich),
// OU=457608-K,
diff --git a/make/data/tzdata/VERSION b/make/data/tzdata/VERSION
index e96a6d78497..94ba7462f2e 100644
--- a/make/data/tzdata/VERSION
+++ b/make/data/tzdata/VERSION
@@ -21,4 +21,4 @@
# or visit www.oracle.com if you need additional information or have any
# questions.
#
-tzdata2020a
+tzdata2020d
diff --git a/make/data/tzdata/africa b/make/data/tzdata/africa
index 7947bc55b00..e1c3d8929e6 100644
--- a/make/data/tzdata/africa
+++ b/make/data/tzdata/africa
@@ -87,7 +87,7 @@
# Corrections are welcome.
# Algeria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
Rule Algeria 1916 1919 - Oct Sun>=1 23:00s 0 -
Rule Algeria 1917 only - Mar 24 23:00s 1:00 S
@@ -110,10 +110,9 @@ Rule Algeria 1978 only - Mar 24 1:00 1:00 S
Rule Algeria 1978 only - Sep 22 3:00 0 -
Rule Algeria 1980 only - Apr 25 0:00 1:00 S
Rule Algeria 1980 only - Oct 31 2:00 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
+# See Europe/Paris for PMT-related transitions.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
+Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 16
0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
0:00 Algeria WE%sT 1940 Feb 25 2:00
1:00 Algeria CE%sT 1946 Oct 7
@@ -199,7 +198,7 @@ Link Africa/Abidjan Atlantic/St_Helena # St Helena
# Egypt was mean noon at the Great Pyramid, 2:04:30.5, but apparently this
# did not apply to Cairo, Alexandria, or Port Said.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Egypt 1940 only - Jul 15 0:00 1:00 S
Rule Egypt 1940 only - Oct 1 0:00 0 -
Rule Egypt 1941 only - Apr 15 0:00 1:00 S
@@ -434,7 +433,7 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct
# now Ghana observed different DST regimes in different years. For
# lack of better info, use Shanks except treat the minus sign as a
# typo, and assume DST started in 1920 not 1936.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Ghana 1920 1942 - Sep 1 0:00 0:20 -
Rule Ghana 1920 1942 - Dec 31 0:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -524,7 +523,7 @@ Zone Africa/Monrovia -0:43:08 - LMT 1882
# From Paul Eggert (2013-10-25):
# For now, assume they're reverting to the pre-2012 rules of permanent UT +02.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Libya 1951 only - Oct 14 2:00 1:00 S
Rule Libya 1952 only - Jan 1 0:00 0 -
Rule Libya 1953 only - Oct 9 2:00 1:00 S
@@ -647,7 +646,7 @@ Zone Africa/Tripoli 0:52:44 - LMT 1920
# "The trial ended on March 29, 2009, when the clocks moved back by one hour
# at 2am (or 02:00) local time..."
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mauritius 1982 only - Oct 10 0:00 1:00 -
Rule Mauritius 1983 only - Mar 21 0:00 0 -
Rule Mauritius 2008 only - Oct lastSun 2:00 1:00 -
@@ -898,17 +897,30 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis
# https://maroc-diplomatique.net/maroc-le-retour-a-lheure-gmt-est-prevu-dimanche-prochain/
# http://aujourdhui.ma/actualite/gmt1-retour-a-lheure-normale-dimanche-prochain-1
#
-# From Paul Eggert (2020-04-14):
+# From Milamber (2020-05-31)
+# In Morocco (where I live), the end of Ramadan (Arabic month) is followed by
+# the Eid al-Fitr, and concretely it's 1 or 2 day offs for the people (with
+# traditional visiting of family, big lunches/dinners, etc.). So for this
+# year the astronomical calculations don't include the following 2 days off in
+# the calc. These 2 days fall in a Sunday/Monday, so it's not acceptable by
+# people to have a time shift during these 2 days off. Perhaps you can modify
+# the (predicted) rules for next years: if the end of Ramadan is a (probable)
+# Friday or Saturday (and so the 2 days off are on a weekend), the next time
+# shift will be the next weekend.
+#
+# From Paul Eggert (2020-05-31):
# For now, guess that in the future Morocco will fall back at 03:00
# the last Sunday before Ramadan, and spring forward at 02:00 the
-# first Sunday after the day after Ramadan. To implement this,
-# transition dates for 2021 through 2087 were determined by running
-# the following program under GNU Emacs 26.3.
-# (let ((islamic-year 1442))
+# first Sunday after two days after Ramadan. To implement this,
+# transition dates and times for 2019 through 2087 were determined by
+# running the following program under GNU Emacs 26.3. (This algorithm
+# also produces the correct transition dates for 2016 through 2018,
+# though the times differ due to Morocco's time zone change in 2018.)
+# (let ((islamic-year 1440))
# (require 'cal-islam)
# (while (< islamic-year 1511)
# (let ((a (calendar-islamic-to-absolute (list 9 1 islamic-year)))
-# (b (1+ (calendar-islamic-to-absolute (list 10 1 islamic-year))))
+# (b (+ 2 (calendar-islamic-to-absolute (list 10 1 islamic-year))))
# (sunday 0))
# (while (/= sunday (mod (setq a (1- a)) 7)))
# (while (/= sunday (mod b 7))
@@ -923,7 +935,7 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis
# (car (cdr (cdr b))) (calendar-month-name (car b) t) (car (cdr b)))))
# (setq islamic-year (+ 1 islamic-year))))
-# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Morocco 1939 only - Sep 12 0:00 1:00 -
Rule Morocco 1939 only - Nov 19 0:00 0 -
Rule Morocco 1940 only - Feb 25 0:00 1:00 -
@@ -974,7 +986,7 @@ Rule Morocco 2021 only - May 16 2:00 0 -
Rule Morocco 2022 only - Mar 27 3:00 -1:00 -
Rule Morocco 2022 only - May 8 2:00 0 -
Rule Morocco 2023 only - Mar 19 3:00 -1:00 -
-Rule Morocco 2023 only - Apr 23 2:00 0 -
+Rule Morocco 2023 only - Apr 30 2:00 0 -
Rule Morocco 2024 only - Mar 10 3:00 -1:00 -
Rule Morocco 2024 only - Apr 14 2:00 0 -
Rule Morocco 2025 only - Feb 23 3:00 -1:00 -
@@ -990,7 +1002,7 @@ Rule Morocco 2029 only - Feb 18 2:00 0 -
Rule Morocco 2029 only - Dec 30 3:00 -1:00 -
Rule Morocco 2030 only - Feb 10 2:00 0 -
Rule Morocco 2030 only - Dec 22 3:00 -1:00 -
-Rule Morocco 2031 only - Jan 26 2:00 0 -
+Rule Morocco 2031 only - Feb 2 2:00 0 -
Rule Morocco 2031 only - Dec 14 3:00 -1:00 -
Rule Morocco 2032 only - Jan 18 2:00 0 -
Rule Morocco 2032 only - Nov 28 3:00 -1:00 -
@@ -1006,7 +1018,7 @@ Rule Morocco 2036 only - Nov 23 2:00 0 -
Rule Morocco 2037 only - Oct 4 3:00 -1:00 -
Rule Morocco 2037 only - Nov 15 2:00 0 -
Rule Morocco 2038 only - Sep 26 3:00 -1:00 -
-Rule Morocco 2038 only - Oct 31 2:00 0 -
+Rule Morocco 2038 only - Nov 7 2:00 0 -
Rule Morocco 2039 only - Sep 18 3:00 -1:00 -
Rule Morocco 2039 only - Oct 23 2:00 0 -
Rule Morocco 2040 only - Sep 2 3:00 -1:00 -
@@ -1022,7 +1034,7 @@ Rule Morocco 2044 only - Aug 28 2:00 0 -
Rule Morocco 2045 only - Jul 9 3:00 -1:00 -
Rule Morocco 2045 only - Aug 20 2:00 0 -
Rule Morocco 2046 only - Jul 1 3:00 -1:00 -
-Rule Morocco 2046 only - Aug 5 2:00 0 -
+Rule Morocco 2046 only - Aug 12 2:00 0 -
Rule Morocco 2047 only - Jun 23 3:00 -1:00 -
Rule Morocco 2047 only - Jul 28 2:00 0 -
Rule Morocco 2048 only - Jun 7 3:00 -1:00 -
@@ -1038,7 +1050,7 @@ Rule Morocco 2052 only - Jun 2 2:00 0 -
Rule Morocco 2053 only - Apr 13 3:00 -1:00 -
Rule Morocco 2053 only - May 25 2:00 0 -
Rule Morocco 2054 only - Apr 5 3:00 -1:00 -
-Rule Morocco 2054 only - May 10 2:00 0 -
+Rule Morocco 2054 only - May 17 2:00 0 -
Rule Morocco 2055 only - Mar 28 3:00 -1:00 -
Rule Morocco 2055 only - May 2 2:00 0 -
Rule Morocco 2056 only - Mar 12 3:00 -1:00 -
@@ -1054,7 +1066,7 @@ Rule Morocco 2060 only - Mar 7 2:00 0 -
Rule Morocco 2061 only - Jan 16 3:00 -1:00 -
Rule Morocco 2061 only - Feb 27 2:00 0 -
Rule Morocco 2062 only - Jan 8 3:00 -1:00 -
-Rule Morocco 2062 only - Feb 12 2:00 0 -
+Rule Morocco 2062 only - Feb 19 2:00 0 -
Rule Morocco 2062 only - Dec 31 3:00 -1:00 -
Rule Morocco 2063 only - Feb 4 2:00 0 -
Rule Morocco 2063 only - Dec 16 3:00 -1:00 -
@@ -1070,7 +1082,7 @@ Rule Morocco 2067 only - Dec 11 2:00 0 -
Rule Morocco 2068 only - Oct 21 3:00 -1:00 -
Rule Morocco 2068 only - Dec 2 2:00 0 -
Rule Morocco 2069 only - Oct 13 3:00 -1:00 -
-Rule Morocco 2069 only - Nov 17 2:00 0 -
+Rule Morocco 2069 only - Nov 24 2:00 0 -
Rule Morocco 2070 only - Oct 5 3:00 -1:00 -
Rule Morocco 2070 only - Nov 9 2:00 0 -
Rule Morocco 2071 only - Sep 20 3:00 -1:00 -
@@ -1086,7 +1098,7 @@ Rule Morocco 2075 only - Sep 15 2:00 0 -
Rule Morocco 2076 only - Jul 26 3:00 -1:00 -
Rule Morocco 2076 only - Sep 6 2:00 0 -
Rule Morocco 2077 only - Jul 18 3:00 -1:00 -
-Rule Morocco 2077 only - Aug 22 2:00 0 -
+Rule Morocco 2077 only - Aug 29 2:00 0 -
Rule Morocco 2078 only - Jul 10 3:00 -1:00 -
Rule Morocco 2078 only - Aug 14 2:00 0 -
Rule Morocco 2079 only - Jun 25 3:00 -1:00 -
@@ -1096,13 +1108,13 @@ Rule Morocco 2080 only - Jul 21 2:00 0 -
Rule Morocco 2081 only - Jun 1 3:00 -1:00 -
Rule Morocco 2081 only - Jul 13 2:00 0 -
Rule Morocco 2082 only - May 24 3:00 -1:00 -
-Rule Morocco 2082 only - Jun 28 2:00 0 -
+Rule Morocco 2082 only - Jul 5 2:00 0 -
Rule Morocco 2083 only - May 16 3:00 -1:00 -
Rule Morocco 2083 only - Jun 20 2:00 0 -
Rule Morocco 2084 only - Apr 30 3:00 -1:00 -
Rule Morocco 2084 only - Jun 11 2:00 0 -
Rule Morocco 2085 only - Apr 22 3:00 -1:00 -
-Rule Morocco 2085 only - May 27 2:00 0 -
+Rule Morocco 2085 only - Jun 3 2:00 0 -
Rule Morocco 2086 only - Apr 14 3:00 -1:00 -
Rule Morocco 2086 only - May 19 2:00 0 -
Rule Morocco 2087 only - Mar 30 3:00 -1:00 -
@@ -1203,7 +1215,7 @@ Link Africa/Maputo Africa/Lusaka # Zambia
# Use plain "WAT" and "CAT" for the time zone abbreviations, to be compatible
# with Namibia's neighbors.
-# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Vanguard section, for zic and other parsers that support negative DST.
Rule Namibia 1994 only - Mar 21 0:00 -1:00 WAT
Rule Namibia 1994 2017 - Sep Sun>=1 2:00 0 CAT
@@ -1326,7 +1338,7 @@ Zone Indian/Mahe 3:41:48 - LMT 1906 Jun # Victoria
# See Africa/Nairobi.
# South Africa
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule SA 1942 1943 - Sep Sun>=15 2:00 1:00 -
Rule SA 1943 1944 - Mar Sun>=15 2:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1359,7 +1371,7 @@ Link Africa/Johannesburg Africa/Mbabane # Eswatini
# Abdalla of NTC, archived at:
# https://mm.icann.org/pipermail/tz/2017-October/025333.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Sudan 1970 only - May 1 0:00 1:00 S
Rule Sudan 1970 1985 - Oct 15 0:00 0 -
Rule Sudan 1971 only - Apr 30 0:00 1:00 S
@@ -1447,7 +1459,7 @@ Zone Africa/Juba 2:06:28 - LMT 1931
# http://www.almadenahnews.com/newss/news.php?c=118&id=38036
# http://www.worldtimezone.com/dst_news/dst_news_tunis02.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Tunisia 1939 only - Apr 15 23:00s 1:00 S
Rule Tunisia 1939 only - Nov 18 23:00s 0 -
Rule Tunisia 1940 only - Feb 25 23:00s 1:00 S
@@ -1474,9 +1486,7 @@ Rule Tunisia 2005 only - Sep 30 1:00s 0 -
Rule Tunisia 2006 2008 - Mar lastSun 2:00s 1:00 S
Rule Tunisia 2006 2008 - Oct lastSun 2:00s 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
-# Shanks & Pottenger say the 1911 switch was on Mar 9; go with Howse's Mar 11.
+# See Europe/Paris for PMT-related transitions.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Africa/Tunis 0:40:44 - LMT 1881 May 12
0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
diff --git a/make/data/tzdata/antarctica b/make/data/tzdata/antarctica
index 6239f837e9f..509fadc29a9 100644
--- a/make/data/tzdata/antarctica
+++ b/make/data/tzdata/antarctica
@@ -93,15 +93,30 @@
# Australian Antarctica Division informed us that Casey changed time
# zone to UTC+11 in "the morning of 22nd October 2016".
+# From Steffen Thorsen (2020-10-02, as corrected):
+# Based on information we have received from the Australian Antarctic
+# Division, Casey station and Macquarie Island station will move to Tasmanian
+# daylight savings time on Sunday 4 October. This will take effect from 0001
+# hrs on Sunday 4 October 2020 and will mean Casey and Macquarie Island will
+# be on the same time zone as Hobart. Some past dates too for this 3 hour
+# time change back and forth between UTC+8 and UTC+11 for Casey:
+# - 2018 Oct 7 4:00 - 2019 Mar 17 3:00 - 2019 Oct 4 3:00 - 2020 Mar 8 3:00
+# and now - 2020 Oct 4 0:01
+
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Antarctica/Casey 0 - -00 1969
- 8:00 - +08 2009 Oct 18 2:00
+Zone Antarctica/Casey 0 - -00 1969
+ 8:00 - +08 2009 Oct 18 2:00
11:00 - +11 2010 Mar 5 2:00
- 8:00 - +08 2011 Oct 28 2:00
+ 8:00 - +08 2011 Oct 28 2:00
11:00 - +11 2012 Feb 21 17:00u
- 8:00 - +08 2016 Oct 22
+ 8:00 - +08 2016 Oct 22
11:00 - +11 2018 Mar 11 4:00
- 8:00 - +08
+ 8:00 - +08 2018 Oct 7 4:00
+ 11:00 - +11 2019 Mar 17 3:00
+ 8:00 - +08 2019 Oct 4 3:00
+ 11:00 - +11 2020 Mar 8 3:00
+ 8:00 - +08 2020 Oct 4 0:01
+ 11:00 - +11
Zone Antarctica/Davis 0 - -00 1957 Jan 13
7:00 - +07 1964 Nov
0 - -00 1969 Feb
@@ -247,7 +262,7 @@ Zone Antarctica/Syowa 0 - -00 1957 Jan 29
# suggested by Bengt-Inge Larsson comment them out for now, and approximate
# with only UTC and CEST. Uncomment them when 2014b is more prevalent.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
#Rule Troll 2005 max - Mar 1 1:00u 1:00 +01
Rule Troll 2005 max - Mar lastSun 1:00u 2:00 +02
#Rule Troll 2005 max - Oct lastSun 1:00u 1:00 +01
diff --git a/make/data/tzdata/asia b/make/data/tzdata/asia
index 0700aa46b41..acca6554fa2 100644
--- a/make/data/tzdata/asia
+++ b/make/data/tzdata/asia
@@ -93,7 +93,7 @@
###############################################################################
# These rules are stolen from the 'europe' file.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EUAsia 1981 max - Mar lastSun 1:00u 1:00 S
Rule EUAsia 1979 1995 - Sep lastSun 1:00u 0 -
Rule EUAsia 1996 max - Oct lastSun 1:00u 0 -
@@ -137,7 +137,7 @@ Zone Asia/Kabul 4:36:48 - LMT 1890
# or
# (brief)
# http://www.worldtimezone.com/dst_news/dst_news_armenia03.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Armenia 2011 only - Mar lastSun 2:00s 1:00 -
Rule Armenia 2011 only - Oct lastSun 2:00s 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -163,7 +163,7 @@ Zone Asia/Yerevan 2:58:00 - LMT 1924 May 2
# http://vestnikkavkaza.net/news/Azerbaijani-Cabinet-of-Ministers-cancels-daylight-saving-time.html
# http://en.apa.az/xeber_azerbaijan_abolishes_daylight_savings_ti_240862.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Azer 1997 2015 - Mar lastSun 4:00 1:00 -
Rule Azer 1997 2015 - Oct lastSun 5:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -250,7 +250,7 @@ Zone Asia/Baku 3:19:24 - LMT 1924 May 2
# http://www.thedailystar.net/newDesign/latest_news.php?nid=22817
# http://www.worldtimezone.com/dst_news/dst_news_bangladesh06.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Dhaka 2009 only - Jun 19 23:00 1:00 -
Rule Dhaka 2009 only - Dec 31 24:00 0 -
@@ -326,7 +326,7 @@ Zone Asia/Yangon 6:24:47 - LMT 1880 # or Rangoon
# generally esteemed a success, it was announced early in 1920 that it would
# not be repeated."
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Shang 1919 only - Apr 12 24:00 1:00 D
Rule Shang 1919 only - Sep 30 24:00 0 S
@@ -422,7 +422,7 @@ Rule Shang 1919 only - Sep 30 24:00 0 S
# the Yangtze river delta area during that period of time although the scope
# of such use will need to be investigated to determine.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Shang 1940 only - Jun 1 0:00 1:00 D
Rule Shang 1940 only - Oct 12 24:00 0 S
Rule Shang 1941 only - Mar 15 0:00 1:00 D
@@ -485,7 +485,7 @@ Rule Shang 1948 1949 - Sep 30 24:00 0 S #plan
# to begin on 17 April.
# http://data.people.com.cn/pic/101p/1988/04/1988041201.jpg
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule PRC 1986 only - May 4 2:00 1:00 D
Rule PRC 1986 1991 - Sep Sun>=11 2:00 0 S
Rule PRC 1987 1991 - Apr Sun>=11 2:00 1:00 D
@@ -869,7 +869,7 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928
# or dates for the 1942 and 1945 transitions.
# The Japanese occupation of Hong Kong began 1941-12-25.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule HK 1946 only - Apr 21 0:00 1:00 S
Rule HK 1946 only - Dec 1 3:30s 0 -
Rule HK 1947 only - Apr 13 3:30s 1:00 S
@@ -996,7 +996,7 @@ Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 0:36:42
# until 1945-09-21 at 01:00, overriding Shanks & Pottenger.
# Likewise, use Yu-Cheng Chuang's data for DST in Taiwan.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Taiwan 1946 only - May 15 0:00 1:00 D
Rule Taiwan 1946 only - Oct 1 0:00 0 S
Rule Taiwan 1947 only - Apr 15 0:00 1:00 D
@@ -1122,7 +1122,7 @@ Zone Asia/Taipei 8:06:00 - LMT 1896 Jan 1
# The 1904 decree says that Macau changed from the meridian of
# Fortaleza do Monte, presumably the basis for the 7:34:10 for LMT.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Macau 1942 1943 - Apr 30 23:00 1:00 -
Rule Macau 1942 only - Nov 17 23:00 0 -
Rule Macau 1943 only - Sep 30 23:00 0 S
@@ -1180,7 +1180,7 @@ Zone Asia/Macau 7:34:10 - LMT 1904 Oct 30
# Cyprus to remain united in time. Cyprus Mail 2017-10-17.
# https://cyprus-mail.com/2017/10/17/cyprus-remain-united-time/
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cyprus 1975 only - Apr 13 0:00 1:00 S
Rule Cyprus 1975 only - Oct 12 0:00 0 -
Rule Cyprus 1976 only - May 15 0:00 1:00 S
@@ -1557,7 +1557,7 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov
# be changed back to its previous state on the 24 hours of the
# thirtieth day of Shahrivar.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iran 1978 1980 - Mar 20 24:00 1:00 -
Rule Iran 1978 only - Oct 20 24:00 0 -
Rule Iran 1979 only - Sep 18 24:00 0 -
@@ -1699,7 +1699,7 @@ Zone Asia/Tehran 3:25:44 - LMT 1916
# We have published a short article in English about the change:
# https://www.timeanddate.com/news/time/iraq-dumps-daylight-saving.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iraq 1982 only - May 1 0:00 1:00 -
Rule Iraq 1982 1984 - Oct 1 0:00 0 -
Rule Iraq 1983 only - Mar 31 0:00 1:00 -
@@ -1722,6 +1722,10 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890
# Israel
+# For more info about the motivation for DST in Israel, see:
+# Barak Y. Israel's Daylight Saving Time controversy. Israel Affairs.
+# 2020-08-11. https://doi.org/10.1080/13537121.2020.1806564
+
# From Ephraim Silverberg (2001-01-11):
#
# I coined "IST/IDT" circa 1988. Until then there were three
@@ -1743,7 +1747,7 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890
# family is from India).
# From Shanks & Pottenger:
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1940 only - Jun 1 0:00 1:00 D
Rule Zion 1942 1944 - Nov 1 0:00 0 S
Rule Zion 1943 only - Apr 1 2:00 1:00 D
@@ -1835,7 +1839,7 @@ Rule Zion 1988 only - Sep 4 0:00 0 S
# (except in 2002) is three nights before Yom Kippur [Day of Atonement]
# (the eve of the 7th of Tishrei in the lunar Hebrew calendar).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1989 only - Apr 30 0:00 1:00 D
Rule Zion 1989 only - Sep 3 0:00 0 S
Rule Zion 1990 only - Mar 25 0:00 1:00 D
@@ -1851,7 +1855,7 @@ Rule Zion 1993 only - Sep 5 0:00 0 S
# Ministry of Interior, Jerusalem, Israel. The spokeswoman can be reached by
# calling the office directly at 972-2-6701447 or 972-2-6701448.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1994 only - Apr 1 0:00 1:00 D
Rule Zion 1994 only - Aug 28 0:00 0 S
Rule Zion 1995 only - Mar 31 0:00 1:00 D
@@ -1871,7 +1875,7 @@ Rule Zion 1995 only - Sep 3 0:00 0 S
#
# where YYYY is the relevant year.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1996 only - Mar 15 0:00 1:00 D
Rule Zion 1996 only - Sep 16 0:00 0 S
Rule Zion 1997 only - Mar 21 0:00 1:00 D
@@ -1894,7 +1898,7 @@ Rule Zion 1999 only - Sep 3 2:00 0 S
#
# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2000 only - Apr 14 2:00 1:00 D
Rule Zion 2000 only - Oct 6 1:00 0 S
Rule Zion 2001 only - Apr 9 1:00 1:00 D
@@ -1916,7 +1920,7 @@ Rule Zion 2004 only - Sep 22 1:00 0 S
#
# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+beyond.ps
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2005 2012 - Apr Fri<=1 2:00 1:00 D
Rule Zion 2005 only - Oct 9 2:00 0 S
Rule Zion 2006 only - Oct 1 2:00 0 S
@@ -1936,7 +1940,7 @@ Rule Zion 2012 only - Sep 23 2:00 0 S
# As of 2013, DST starts at 02:00 on the Friday before the last Sunday
# in March. DST ends at 02:00 on the last Sunday of October.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2013 max - Mar Fri>=23 2:00 1:00 D
Rule Zion 2013 max - Oct lastSun 2:00 0 S
@@ -2036,7 +2040,7 @@ Zone Asia/Jerusalem 2:20:54 - LMT 1880
# do in any POSIX or C platform. The "25:00" assumes zic from 2007 or later,
# which should be safe now.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Japan 1948 only - May Sat>=1 24:00 1:00 D
Rule Japan 1948 1951 - Sep Sat>=8 25:00 0 S
Rule Japan 1949 only - Apr Sat>=1 24:00 1:00 D
@@ -2113,7 +2117,7 @@ Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u
# From Paul Eggert (2013-12-11):
# As Steffen suggested, consider the past 21-month experiment to be DST.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Jordan 1973 only - Jun 6 0:00 1:00 S
Rule Jordan 1973 1975 - Oct 1 0:00 0 -
Rule Jordan 1974 1977 - May 1 0:00 1:00 S
@@ -2439,7 +2443,7 @@ Zone Asia/Oral 3:25:24 - LMT 1924 May 2 # or Ural'sk
# Our government cancels daylight saving time 6th of August 2005.
# From 2005-08-12 our GMT-offset is +6, w/o any daylight saving.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Kyrgyz 1992 1996 - Apr Sun>=7 0:00s 1:00 -
Rule Kyrgyz 1992 1996 - Sep lastSun 0:00 0 -
Rule Kyrgyz 1997 2005 - Mar lastSun 2:30 1:00 -
@@ -2495,7 +2499,7 @@ Zone Asia/Bishkek 4:58:24 - LMT 1924 May 2
# follow and continued to use GMT+9:00 for interoperability.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule ROK 1948 only - Jun 1 0:00 1:00 D
Rule ROK 1948 only - Sep 12 24:00 0 S
Rule ROK 1949 only - Apr 3 0:00 1:00 D
@@ -2583,7 +2587,7 @@ Zone Asia/Pyongyang 8:23:00 - LMT 1908 Apr 1
# Lebanon
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Lebanon 1920 only - Mar 28 0:00 1:00 S
Rule Lebanon 1920 only - Oct 25 0:00 0 -
Rule Lebanon 1921 only - Apr 3 0:00 1:00 S
@@ -2613,7 +2617,7 @@ Zone Asia/Beirut 2:22:00 - LMT 1880
2:00 Lebanon EE%sT
# Malaysia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NBorneo 1935 1941 - Sep 14 0:00 0:20 -
Rule NBorneo 1935 1941 - Dec 14 0:00 0 -
#
@@ -2758,7 +2762,7 @@ Zone Indian/Maldives 4:54:00 - LMT 1880 # Malé
# September daylight saving time ends. Source:
# http://zasag.mn/news/view/8969
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mongol 1983 1984 - Apr 1 0:00 1:00 -
Rule Mongol 1983 only - Oct 1 0:00 0 -
# Shanks & Pottenger and IATA SSIM say 1990s switches occurred at 00:00,
@@ -2946,7 +2950,7 @@ Zone Asia/Kathmandu 5:41:16 - LMT 1920
# "People laud PM's announcement to end DST"
# http://www.app.com.pk/en_/index.php?option=com_content&task=view&id=99374&Itemid=2
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Pakistan 2002 only - Apr Sun>=2 0:00 1:00 S
Rule Pakistan 2002 only - Oct Sun>=2 0:00 0 -
Rule Pakistan 2008 only - Jun 1 0:00 1:00 S
@@ -3240,15 +3244,42 @@ Zone Asia/Karachi 4:28:12 - LMT 1907
# From Sharef Mustafa (2019-10-18):
# Palestine summer time will end on midnight Oct 26th 2019 ...
-# http://www.palestinecabinet.gov.ps/website/ar/ViewDetails?ID=43948
#
-# From Paul Eggert (2019-04-10):
-# For now, guess spring-ahead transitions are March's last Friday at 00:00.
+# From Steffen Thorsen (2020-10-20):
+# Some sources such as these say, and display on clocks, that DST ended at
+# midnight last year...
+# https://www.amad.ps/ar/post/320006
+#
+# From Tim Parenti (2020-10-20):
+# The report of the Palestinian Cabinet meeting of 2019-10-14 confirms
+# a decision on (translated): "The start of the winter time in Palestine, by
+# delaying the clock by sixty minutes, starting from midnight on Friday /
+# Saturday corresponding to 26/10/2019."
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/43948
+
+# From Sharef Mustafa (2020-10-20):
+# As per the palestinian cabinet announcement yesterday , the day light saving
+# shall [end] on Oct 24th 2020 at 01:00AM by delaying the clock by 60 minutes.
+# http://www.palestinecabinet.gov.ps/portal/Meeting/Details/51584
+
+# From Tim Parenti (2020-10-20):
+# Predict future fall transitions at 01:00 on the Saturday preceding October's
+# last Sunday (i.e., Sat>=24). This is consistent with our predictions since
+# 2016, although the time of the change differed slightly in 2019.
+
+# From Pierre Cashon (2020-10-20):
+# The summer time this year started on March 28 at 00:00.
+# https://wafa.ps/ar_page.aspx?id=GveQNZa872839351758aGveQNZ
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/50284
+# The winter time in 2015 started on October 23 at 01:00.
+# https://wafa.ps/ar_page.aspx?id=CgpCdYa670694628582aCgpCdY
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/27583
#
-# From Tim Parenti (2016-10-19):
-# Predict fall transitions on October's last Saturday at 01:00 from now on.
+# From Paul Eggert (2019-04-10):
+# For now, guess spring-ahead transitions are at 00:00 on the Saturday
+# preceding March's last Sunday (i.e., Sat>=24).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EgyptAsia 1957 only - May 10 0:00 1:00 S
Rule EgyptAsia 1957 1958 - Oct 1 0:00 0 -
Rule EgyptAsia 1958 only - May 1 0:00 1:00 S
@@ -3262,10 +3293,10 @@ Rule Palestine 2004 only - Oct 1 1:00 0 -
Rule Palestine 2005 only - Oct 4 2:00 0 -
Rule Palestine 2006 2007 - Apr 1 0:00 1:00 S
Rule Palestine 2006 only - Sep 22 0:00 0 -
-Rule Palestine 2007 only - Sep Thu>=8 2:00 0 -
+Rule Palestine 2007 only - Sep 13 2:00 0 -
Rule Palestine 2008 2009 - Mar lastFri 0:00 1:00 S
Rule Palestine 2008 only - Sep 1 0:00 0 -
-Rule Palestine 2009 only - Sep Fri>=1 1:00 0 -
+Rule Palestine 2009 only - Sep 4 1:00 0 -
Rule Palestine 2010 only - Mar 26 0:00 1:00 S
Rule Palestine 2010 only - Aug 11 0:00 0 -
Rule Palestine 2011 only - Apr 1 0:01 1:00 S
@@ -3274,12 +3305,16 @@ Rule Palestine 2011 only - Aug 30 0:00 1:00 S
Rule Palestine 2011 only - Sep 30 0:00 0 -
Rule Palestine 2012 2014 - Mar lastThu 24:00 1:00 S
Rule Palestine 2012 only - Sep 21 1:00 0 -
-Rule Palestine 2013 only - Sep Fri>=21 0:00 0 -
-Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 -
-Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S
+Rule Palestine 2013 only - Sep 27 0:00 0 -
+Rule Palestine 2014 only - Oct 24 0:00 0 -
+Rule Palestine 2015 only - Mar 28 0:00 1:00 S
+Rule Palestine 2015 only - Oct 23 1:00 0 -
Rule Palestine 2016 2018 - Mar Sat>=24 1:00 1:00 S
-Rule Palestine 2016 max - Oct lastSat 1:00 0 -
-Rule Palestine 2019 max - Mar lastFri 0:00 1:00 S
+Rule Palestine 2016 2018 - Oct Sat>=24 1:00 0 -
+Rule Palestine 2019 only - Mar 29 0:00 1:00 S
+Rule Palestine 2019 only - Oct Sat>=24 0:00 0 -
+Rule Palestine 2020 max - Mar Sat>=24 0:00 1:00 S
+Rule Palestine 2020 max - Oct Sat>=24 1:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Asia/Gaza 2:17:52 - LMT 1900 Oct
@@ -3348,7 +3383,7 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct
# influence of the sources. There is no current abbreviation for DST,
# so use "PDT", the usual American style.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Phil 1936 only - Nov 1 0:00 1:00 D
Rule Phil 1937 only - Feb 1 0:00 0 S
Rule Phil 1954 only - Apr 12 0:00 1:00 D
@@ -3496,7 +3531,7 @@ Zone Asia/Colombo 5:19:24 - LMT 1880
5:30 - +0530
# Syria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Syria 1920 1923 - Apr Sun>=15 2:00 1:00 S
Rule Syria 1920 1923 - Oct Sun>=1 2:00 0 -
Rule Syria 1962 only - Apr 29 2:00 1:00 S
diff --git a/make/data/tzdata/australasia b/make/data/tzdata/australasia
index e66d5ca4d79..1f0fd47959f 100644
--- a/make/data/tzdata/australasia
+++ b/make/data/tzdata/australasia
@@ -36,7 +36,7 @@
# Please see the notes below for the controversy about "EST" versus "AEST" etc.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Aus 1917 only - Jan 1 0:01 1:00 D
Rule Aus 1917 only - Mar 25 2:00 0 S
Rule Aus 1942 only - Jan 1 2:00 1:00 D
@@ -55,7 +55,7 @@ Zone Australia/Darwin 8:43:20 - LMT 1895 Feb
9:30 Aus AC%sT
# Western Australia
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AW 1974 only - Oct lastSun 2:00s 1:00 D
Rule AW 1975 only - Mar Sun>=1 2:00s 0 S
Rule AW 1983 only - Oct lastSun 2:00s 1:00 D
@@ -93,7 +93,7 @@ Zone Australia/Eucla 8:35:28 - LMT 1895 Dec
# applies to all of the Whitsundays.
# http://www.australia.gov.au/about-australia/australian-story/austn-islands
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AQ 1971 only - Oct lastSun 2:00s 1:00 D
Rule AQ 1972 only - Feb lastSun 2:00s 0 S
Rule AQ 1989 1991 - Oct lastSun 2:00s 1:00 D
@@ -109,7 +109,7 @@ Zone Australia/Lindeman 9:55:56 - LMT 1895
10:00 Holiday AE%sT
# South Australia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AS 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AS 1986 only - Oct 19 2:00s 1:00 D
Rule AS 1987 2007 - Oct lastSun 2:00s 1:00 D
@@ -137,7 +137,7 @@ Zone Australia/Adelaide 9:14:20 - LMT 1895 Feb
# http://www.bom.gov.au/climate/averages/tables/dst_times.shtml
# says King Island didn't observe DST from WWII until late 1971.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AT 1967 only - Oct Sun>=1 2:00s 1:00 D
Rule AT 1968 only - Mar lastSun 2:00s 0 S
Rule AT 1968 1985 - Oct lastSun 2:00s 1:00 D
@@ -170,7 +170,7 @@ Zone Australia/Currie 9:35:28 - LMT 1895 Sep
10:00 AT AE%sT
# Victoria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AV 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AV 1972 only - Feb lastSun 2:00s 0 S
Rule AV 1973 1985 - Mar Sun>=1 2:00s 0 S
@@ -191,7 +191,7 @@ Zone Australia/Melbourne 9:39:52 - LMT 1895 Feb
10:00 AV AE%sT
# New South Wales
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AN 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AN 1972 only - Feb 27 2:00s 0 S
Rule AN 1973 1981 - Mar Sun>=1 2:00s 0 S
@@ -220,7 +220,7 @@ Zone Australia/Broken_Hill 9:25:48 - LMT 1895 Feb
9:30 AS AC%sT
# Lord Howe Island
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule LH 1981 1984 - Oct lastSun 2:00 1:00 -
Rule LH 1982 1985 - Mar Sun>=1 2:00 0 -
Rule LH 1985 only - Oct lastSun 2:00 0:30 -
@@ -275,8 +275,9 @@ Zone Antarctica/Macquarie 0 - -00 1899 Nov
10:00 Aus AE%sT 1919 Apr 1 0:00s
0 - -00 1948 Mar 25
10:00 Aus AE%sT 1967
- 10:00 AT AE%sT 2010 Apr 4 3:00
- 11:00 - +11
+ 10:00 AT AE%sT 2010
+ 10:00 1:00 AEDT 2011
+ 10:00 AT AE%sT
# Christmas
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -403,7 +404,20 @@ Zone Indian/Cocos 6:27:40 - LMT 1900
# From Michael Deckers (2019-08-06):
# https://www.laws.gov.fj/LawsAsMade/downloadfile/848
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# From Raymond Kumar (2020-10-08):
+# [DST in Fiji] is from December 20th 2020, till 17th January 2021.
+# From Alan Mintz (2020-10-08):
+# https://www.laws.gov.fj/LawsAsMade/GetFile/1071
+# From Tim Parenti (2020-10-08):
+# https://www.fijivillage.com/news/Daylight-saving-from-Dec-20th-this-year-to-Jan-17th-2021-8rf4x5/
+# "Minister for Employment, Parveen Bala says they had never thought of
+# stopping daylight saving. He says it was just to decide on when it should
+# start and end. Bala says it is a short period..."
+# Since the end date is still in line with our ongoing predictions, assume for
+# now that the later-than-usual start date is a one-time departure from the
+# recent second Sunday in November pattern.
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Fiji 1998 1999 - Nov Sun>=1 2:00 1:00 -
Rule Fiji 1999 2000 - Feb lastSun 3:00 0 -
Rule Fiji 2009 only - Nov 29 2:00 1:00 -
@@ -414,7 +428,9 @@ Rule Fiji 2012 2013 - Jan Sun>=18 3:00 0 -
Rule Fiji 2014 only - Jan Sun>=18 2:00 0 -
Rule Fiji 2014 2018 - Nov Sun>=1 2:00 1:00 -
Rule Fiji 2015 max - Jan Sun>=12 3:00 0 -
-Rule Fiji 2019 max - Nov Sun>=8 2:00 1:00 -
+Rule Fiji 2019 only - Nov Sun>=8 2:00 1:00 -
+Rule Fiji 2020 only - Dec 20 2:00 1:00 -
+Rule Fiji 2021 max - Nov Sun>=8 2:00 1:00 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Pacific/Fiji 11:55:44 - LMT 1915 Oct 26 # Suva
12:00 Fiji +12/+13
@@ -432,7 +448,7 @@ Zone Pacific/Tahiti -9:58:16 - LMT 1912 Oct # Papeete
# Guam
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# http://guamlegislature.com/Public_Laws_5th/PL05-025.pdf
# http://documents.guam.gov/wp-content/uploads/E.O.-59-7-Guam-Daylight-Savings-Time-May-6-1959.pdf
Rule Guam 1959 only - Jun 27 2:00 1:00 D
@@ -543,7 +559,7 @@ Zone Pacific/Nauru 11:07:40 - LMT 1921 Jan 15 # Uaobe
12:00 - +12
# New Caledonia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NC 1977 1978 - Dec Sun>=1 0:00 1:00 -
Rule NC 1978 1979 - Feb 27 0:00 0 -
Rule NC 1996 only - Dec 1 2:00s 1:00 -
@@ -558,7 +574,7 @@ Zone Pacific/Noumea 11:05:48 - LMT 1912 Jan 13 # Nouméa
# New Zealand
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NZ 1927 only - Nov 6 2:00 1:00 S
Rule NZ 1928 only - Mar 4 2:00 0 M
Rule NZ 1928 1933 - Oct Sun>=8 2:00 0:30 S
@@ -610,7 +626,7 @@ Link Pacific/Auckland Antarctica/McMurdo
# Cook Is
# From Shanks & Pottenger:
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cook 1978 only - Nov 12 0:00 0:30 -
Rule Cook 1979 1991 - Mar Sun>=1 0:00 0 -
Rule Cook 1979 1990 - Oct lastSun 0:00 0:30 -
@@ -755,7 +771,7 @@ Link Pacific/Pago_Pago Pacific/Midway # in US minor outlying islands
# That web page currently lists transitions for 2012/3 and 2013/4.
# Assume the pattern instituted in 2012 will continue indefinitely.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule WS 2010 only - Sep lastSun 0:00 1 -
Rule WS 2011 only - Apr Sat>=1 4:00 0 -
Rule WS 2011 only - Sep lastSat 3:00 1 -
@@ -799,7 +815,7 @@ Zone Pacific/Fakaofo -11:24:56 - LMT 1901
13:00 - +13
# Tonga
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Tonga 1999 only - Oct 7 2:00s 1:00 -
Rule Tonga 2000 only - Mar 19 2:00s 0 -
Rule Tonga 2000 2001 - Nov Sun>=1 2:00 1:00 -
@@ -880,7 +896,7 @@ Zone Pacific/Wake 11:06:28 - LMT 1901
# Vanuatu
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Vanuatu 1983 only - Sep 25 0:00 1:00 -
Rule Vanuatu 1984 1991 - Mar Sun>=23 0:00 0 -
Rule Vanuatu 1984 only - Oct 23 0:00 1:00 -
diff --git a/make/data/tzdata/europe b/make/data/tzdata/europe
index 8fed2cf5e98..adb260624dc 100644
--- a/make/data/tzdata/europe
+++ b/make/data/tzdata/europe
@@ -411,7 +411,7 @@
# http://www.irishstatutebook.ie/eli/1926/sro/919/made/en/print
# http://www.irishstatutebook.ie/eli/1947/sro/71/made/en/print
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Summer Time Act, 1916
Rule GB-Eire 1916 only - May 21 2:00s 1:00 BST
Rule GB-Eire 1916 only - Oct 1 2:00s 0 GMT
@@ -552,7 +552,7 @@ Link Europe/London Europe/Isle_of_Man
# The following is like GB-Eire and EU, except with standard time in
# summer and negative daylight saving time in winter. It is for when
# negative SAVE values are used.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Eire 1971 only - Oct 31 2:00u -1:00 -
Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 -
Rule Eire 1972 1980 - Oct Sun>=23 2:00u -1:00 -
@@ -589,7 +589,7 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2
# predecessor organization, the European Communities.
# For brevity they are called "EU rules" elsewhere in this file.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S
Rule EU 1977 only - Sep lastSun 1:00u 0 -
Rule EU 1978 only - Oct 1 1:00u 0 -
@@ -629,13 +629,13 @@ Rule C-Eur 1944 only - Oct 2 2:00s 0 -
# corrected in version 2008d). The circumstantial evidence is simply the
# tz database itself, as seen below:
#
-# Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
+# Zone Europe/Paris ...
# 0:00 France WE%sT 1945 Sep 16 3:00
#
-# Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
+# Zone Europe/Monaco ...
# 0:00 France WE%sT 1945 Sep 16 3:00
#
-# Zone Europe/Belgrade 1:22:00 - LMT 1884
+# Zone Europe/Belgrade ...
# 1:00 1:00 CEST 1945 Sep 16 2:00s
#
# Rule France 1945 only - Sep 16 3:00 0 -
@@ -681,7 +681,7 @@ Rule E-Eur 1996 max - Oct lastSun 0:00 0 -
#
# The 1917-1921 decree URLs are from Alexander Belopolsky (2016-08-23).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Russia 1917 only - Jul 1 23:00 1:00 MST # Moscow Summer Time
#
# Decree No. 142 (1917-12-22) http://istmat.info/node/28137
@@ -795,7 +795,7 @@ Zone EET 2:00 EU EE%sT
# Albania
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Albania 1940 only - Jun 16 0:00 1:00 S
Rule Albania 1942 only - Nov 2 3:00 0 -
Rule Albania 1943 only - Mar 29 2:00 1:00 S
@@ -849,7 +849,7 @@ Zone Europe/Andorra 0:06:04 - LMT 1901
# In 1946 the end of DST was on Monday, 7 October 1946, at 3:00 am.
# Shanks had this right. Source: Die Weltpresse, 5. Oktober 1946, page 5.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Austria 1920 only - Apr 5 2:00s 1:00 S
Rule Austria 1920 only - Sep 13 2:00s 0 -
Rule Austria 1946 only - Apr 14 2:00s 1:00 S
@@ -936,7 +936,7 @@ Zone Europe/Minsk 1:50:16 - LMT 1880
# The 1918 rules are listed for completeness; they apply to unoccupied Belgium.
# Assume Brussels switched to WET in 1918 when the armistice took effect.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Belgium 1918 only - Mar 9 0:00s 1:00 S
Rule Belgium 1918 1919 - Oct Sat>=1 23:00s 0 -
Rule Belgium 1919 only - Mar 1 23:00s 1:00 S
@@ -996,7 +996,7 @@ Zone Europe/Brussels 0:17:30 - LMT 1880
# EET -> EETDST is in 03:00 Local time in last Sunday of March ...
# EETDST -> EET is in 04:00 Local time in last Sunday of October
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Bulg 1979 only - Mar 31 23:00 1:00 S
Rule Bulg 1979 only - Oct 1 1:00 0 -
Rule Bulg 1980 1982 - Apr Sat>=1 23:00 1:00 S
@@ -1028,7 +1028,7 @@ Zone Europe/Sofia 1:33:16 - LMT 1880
# We know of no English-language name for historical Czech winter time;
# abbreviate it as "GMT", as it happened to be GMT.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Czech 1945 only - Apr Mon>=1 2:00s 1:00 S
Rule Czech 1945 only - Oct 1 2:00s 0 -
Rule Czech 1946 only - May 6 2:00s 1:00 S
@@ -1052,17 +1052,16 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# Denmark, Faroe Islands, and Greenland
# From Jesper Nørgaard Welen (2005-04-26):
-# http://www.hum.aau.dk/~poe/tid/tine/DanskTid.htm says that the law
-# [introducing standard time] was in effect from 1894-01-01....
-# The page http://www.retsinfo.dk/_GETDOCI_/ACCN/A18930008330-REGL
+# the law [introducing standard time] was in effect from 1894-01-01....
+# The page https://www.retsinformation.dk/eli/lta/1893/83
# confirms this, and states that the law was put forth 1893-03-29.
#
# The EU [actually, EEC and Euratom] treaty with effect from 1973:
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19722110030-REGL
+# https://www.retsinformation.dk/eli/lta/1972/21100
#
# This provoked a new law from 1974 to make possible summer time changes
# in subsequent decrees with the law
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19740022330-REGL
+# https://www.retsinformation.dk/eli/lta/1974/223
#
# It seems however that no decree was set forward until 1980. I have
# not found any decree, but in another related law, the effecting DST
@@ -1074,7 +1073,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# The law is about the management of the extra hour, concerning
# working hours reported and effect on obligatory-rest rules (which
# was suspended on that night):
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/C19801120554-REGL
+# https://web.archive.org/web/20140104053304/https://www.retsinformation.dk/Forms/R0710.aspx?id=60267
# From Jesper Nørgaard Welen (2005-06-11):
# The Herning Folkeblad (1980-09-26) reported that the night between
@@ -1084,7 +1083,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# Hence the "02:00" of the 1980 law refers to standard time, not
# wall-clock time, and so the EU rules were in effect in 1980.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Denmark 1916 only - May 14 23:00 1:00 S
Rule Denmark 1916 only - Sep 30 23:00 0 -
Rule Denmark 1940 only - May 15 0:00 1:00 S
@@ -1186,7 +1185,7 @@ Zone Atlantic/Faroe -0:27:04 - LMT 1908 Jan 11 # Tórshavn
# http://naalakkersuisut.gl/~/media/Nanoq/Files/Attached%20Files/Engelske-tekster/Legislation/Executive%20Order%20National%20Park.rtf
# It is their only National Park.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Thule 1991 1992 - Mar lastSun 2:00 1:00 D
Rule Thule 1991 1992 - Sep lastSun 2:00 0 S
Rule Thule 1993 2006 - Apr Sun>=1 2:00 1:00 D
@@ -1317,7 +1316,7 @@ Zone Europe/Tallinn 1:39:00 - LMT 1880
# From Paul Eggert (2014-06-14):
# Go with Oja over Shanks.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Finland 1942 only - Apr 2 24:00 1:00 S
Rule Finland 1942 only - Oct 4 1:00 0 -
Rule Finland 1981 1982 - Mar lastSun 2:00 1:00 S
@@ -1349,10 +1348,58 @@ Link Europe/Helsinki Europe/Mariehamn
# Françoise Gauquelin, Problèmes de l'heure résolus en astrologie,
# Guy Trédaniel, Paris 1987
+# From Michael Deckers (2020-06-11):
+# the law of 1891
+# was published on 1891-03-15, so it could only take force on 1891-03-16.
+
+# From Michael Deckers (2020-06-10):
+# Le Gaulois, 1911-03-11, page 1/6, online at
+# https://www.retronews.fr/societe/echo-de-presse/2018/01/29/1911-change-lheure-de-paris
+# ... [ Instantly, all pressure driven clock dials halted... Nine minutes and
+# twenty-one seconds later the hands resumed their circular motion. ]
+# There are also precise reports about how the change was prepared in train
+# stations: all the publicly visible clocks stopped at midnight railway time
+# (or were covered), only the chief of service had a watch, labeled
+# "Heure ancienne", that he kept running until it reached 00:04:21, when
+# he announced "Heure nouvelle". See the "Le Petit Journal 1911-03-11".
+# https://gallica.bnf.fr/ark:/12148/bpt6k6192911/f1.item.zoom
+#
+# From Michael Deckers (2020-06-12):
+# That "all French clocks stopped" for 00:09:21 is a misreading of French
+# newspapers; this sort of adjustment applies only to certain
+# remote-controlled clocks ("pendules pneumatiques", of which there existed
+# perhaps a dozen in Paris, and which simply could not be set back remotely),
+# but not to all the clocks in all French towns and villages. For instance,
+# the following story in the "Courrier de Saône-et-Loire" 1911-03-11, page 2:
+# only works if legal time was stepped back (was not monotone): ...
+# [One can observe that children who had been born at midnight less 5
+# minutes and who had died at midnight of the old time, would turn out to
+# be dead before being born, time having been set back and having
+# suppressed 9 minutes and 25 seconds of their existence, that is, more
+# than they could spend.]
+#
+# From Paul Eggert (2020-06-12):
+# French time in railway stations was legally five minutes behind civil time,
+# which explains why railway "old time" ran to 00:04:21 instead of to 00:09:21.
+# The law's text (which Michael Deckers noted is at
+# ) says only that
+# at 1911-03-11 00:00 legal time was that of Paris mean time delayed by
+# nine minutes and twenty-one seconds, and does not say how the
+# transition from Paris mean time was to occur.
+#
+# tzdb has no way to represent stopped clocks. As the railway practice
+# was to keep a watch running on "old time" to decide when to restart
+# the other clocks, this could be modeled as a transition for "old time" at
+# 00:09:21. However, since the law was ambiguous and clocks outside railway
+# stations were probably done haphazardly with the popular impression being
+# that the transition was done at 00:00 "old time", simply leave the time
+# blank; this causes zic to default to 00:00 "old time" which is good enough.
+# Do something similar for the 1891-03-16 transition. There are similar
+# problems in Algiers, Monaco and Tunis.
#
# Shank & Pottenger seem to use '24:00' ambiguously; resolve it with Whitman.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule France 1916 only - Jun 14 23:00s 1:00 S
Rule France 1916 1919 - Oct Sun>=1 23:00s 0 -
Rule France 1917 only - Mar 24 23:00s 1:00 S
@@ -1412,13 +1459,11 @@ Rule France 1945 only - Sep 16 3:00 0 -
# go with Excoffier's 28/3/76 0hUT and 25/9/76 23hUT.
Rule France 1976 only - Mar 28 1:00 1:00 S
Rule France 1976 only - Sep 26 1:00 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time, and Whitman 0:09:05,
-# but Howse quotes the actual French legislation as saying 0:09:21.
-# Go with Howse. Howse writes that the time in France was officially based
+# Howse writes that the time in France was officially based
# on PMT-0:09:21 until 1978-08-09, when the time base finally switched to UTC.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
- 0:09:21 - PMT 1911 Mar 11 0:01 # Paris MT
+Zone Europe/Paris 0:09:21 - LMT 1891 Mar 16
+ 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
# Shanks & Pottenger give 1940 Jun 14 0:00; go with Excoffier and Le Corre.
0:00 France WE%sT 1940 Jun 14 23:00
# Le Corre says Paris stuck with occupied-France time after the liberation;
@@ -1447,7 +1492,7 @@ Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
# this was equivalent to UT +03, not +04.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Germany 1946 only - Apr 14 2:00s 1:00 S
Rule Germany 1946 only - Oct 7 2:00s 0 -
Rule Germany 1947 1949 - Oct Sun>=1 2:00s 0 -
@@ -1499,7 +1544,7 @@ Zone Europe/Gibraltar -0:21:24 - LMT 1880 Aug 2 0:00s
1:00 EU CE%sT
# Greece
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Whitman gives 1932 Jul 5 - Nov 1; go with Shanks & Pottenger.
Rule Greece 1932 only - Jul 7 0:00 1:00 S
Rule Greece 1932 only - Sep 1 0:00 0 -
@@ -1534,38 +1579,73 @@ Zone Europe/Athens 1:34:52 - LMT 1895 Sep 14
2:00 EU EE%sT
# Hungary
-# From Paul Eggert (2014-07-15):
-# Dates for 1916-1945 are taken from:
-# Oross A. Jelen a múlt jövője: a nyári időszámítás Magyarországon 1916-1945.
-# National Archives of Hungary (2012-10-29).
-# http://mnl.gov.hu/a_het_dokumentuma/a_nyari_idoszamitas_magyarorszagon_19161945.html
-# This source does not always give times, which are taken from Shanks
-# & Pottenger (which disagree about the dates).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-Rule Hungary 1918 only - Apr 1 3:00 1:00 S
-Rule Hungary 1918 only - Sep 16 3:00 0 -
-Rule Hungary 1919 only - Apr 15 3:00 1:00 S
-Rule Hungary 1919 only - Nov 24 3:00 0 -
+
+# From Michael Deckers (2020-06-09):
+# an Austrian encyclopedia of railroads of 1913, online at
+# http://www.zeno.org/Roell-1912/A/Eisenbahnzeit
+# says that the switch [to CET] happened on 1890-11-01.
+
+# From Géza Nyáry (2020-06-07):
+# Data for 1918-1983 are based on the archive database of Library Hungaricana.
+# The dates are collected from original, scanned governmental orders,
+# bulletins, instructions and public press.
+# [See URLs below.]
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1918/?pg=238
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1919/?pg=808
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1920/?pg=201
+Rule Hungary 1918 1919 - Apr 15 2:00 1:00 S
+Rule Hungary 1918 1920 - Sep Mon>=15 3:00 0 -
+Rule Hungary 1920 only - Apr 5 2:00 1:00 S
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1945/?pg=882
Rule Hungary 1945 only - May 1 23:00 1:00 S
-Rule Hungary 1945 only - Nov 1 0:00 0 -
+Rule Hungary 1945 only - Nov 1 1:00 0 -
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_03/?pg=49
Rule Hungary 1946 only - Mar 31 2:00s 1:00 S
-Rule Hungary 1946 1949 - Oct Sun>=1 2:00s 0 -
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_09/?pg=54
+Rule Hungary 1946 only - Oct 7 2:00 0 -
+# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1947_04_1__001-123/?pg=90
+# https://library.hungaricana.hu/hu/view/DunantuliNaplo_1947_09/?pg=128
+# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1948_03_3__001-123/?pg=304
+# https://library.hungaricana.hu/hu/view/Zala_1948_09/?pg=64
+# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=53
+# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=160
+# https://library.hungaricana.hu/hu/view/UjSzo_1949_01-04/?pg=102
+# https://library.hungaricana.hu/hu/view/KeletMagyarorszag_1949_03/?pg=96
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1949_09/?pg=94
Rule Hungary 1947 1949 - Apr Sun>=4 2:00s 1:00 S
-Rule Hungary 1950 only - Apr 17 2:00s 1:00 S
-Rule Hungary 1950 only - Oct 23 2:00s 0 -
-Rule Hungary 1954 1955 - May 23 0:00 1:00 S
-Rule Hungary 1954 1955 - Oct 3 0:00 0 -
-Rule Hungary 1956 only - Jun Sun>=1 0:00 1:00 S
-Rule Hungary 1956 only - Sep lastSun 0:00 0 -
-Rule Hungary 1957 only - Jun Sun>=1 1:00 1:00 S
-Rule Hungary 1957 only - Sep lastSun 3:00 0 -
-Rule Hungary 1980 only - Apr 6 1:00 1:00 S
+Rule Hungary 1947 1949 - Oct Sun>=1 2:00s 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1954/?pg=513
+Rule Hungary 1954 only - May 23 0:00 1:00 S
+Rule Hungary 1954 only - Oct 3 0:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1955/?pg=398
+Rule Hungary 1955 only - May 22 2:00 1:00 S
+Rule Hungary 1955 only - Oct 2 3:00 0 -
+# https://library.hungaricana.hu/hu/view/HevesMegyeiNepujsag_1956_06/?pg=0
+# https://library.hungaricana.hu/hu/view/EszakMagyarorszag_1956_06/?pg=6
+# https://library.hungaricana.hu/hu/view/SzolnokMegyeiNeplap_1957_04/?pg=120
+# https://library.hungaricana.hu/hu/view/PestMegyeiHirlap_1957_09/?pg=143
+Rule Hungary 1956 1957 - Jun Sun>=1 2:00 1:00 S
+Rule Hungary 1956 1957 - Sep lastSun 3:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=189
+Rule Hungary 1980 only - Apr 6 0:00 1:00 S
+Rule Hungary 1980 only - Sep 28 1:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=1227
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1981_01/?pg=79
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1982/?pg=115
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1983/?pg=85
+Rule Hungary 1981 1983 - Mar lastSun 0:00 1:00 S
+Rule Hungary 1981 1983 - Sep lastSun 1:00 0 -
+#
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Budapest 1:16:20 - LMT 1890 Oct
+Zone Europe/Budapest 1:16:20 - LMT 1890 Nov 1
1:00 C-Eur CE%sT 1918
- 1:00 Hungary CE%sT 1941 Apr 8
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1941/?pg=1204
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1942/?pg=3955
+ 1:00 Hungary CE%sT 1941 Apr 7 23:00
1:00 C-Eur CE%sT 1945
- 1:00 Hungary CE%sT 1980 Sep 28 2:00s
+ 1:00 Hungary CE%sT 1984
1:00 EU CE%sT
# Iceland
@@ -1601,7 +1681,7 @@ Zone Europe/Budapest 1:16:20 - LMT 1890 Oct
# The information below is taken from the 1988 Almanak; see
# http://www.almanak.hi.is/klukkan.html
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iceland 1917 1919 - Feb 19 23:00 1:00 -
Rule Iceland 1917 only - Oct 21 1:00 0 -
Rule Iceland 1918 1919 - Nov 16 1:00 0 -
@@ -1693,7 +1773,7 @@ Zone Atlantic/Reykjavik -1:28 - LMT 1908
# to 1944-06-04; although Rome was an open city during this period, it
# was effectively controlled by Germany.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Italy 1916 only - Jun 3 24:00 1:00 S
Rule Italy 1916 1917 - Sep 30 24:00 0 -
Rule Italy 1917 only - Mar 31 24:00 1:00 S
@@ -1803,7 +1883,7 @@ Link Europe/Rome Europe/San_Marino
# urged Lithuania and Estonia to adopt a similar time policy, but it
# appears that they will not do so....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Latvia 1989 1996 - Mar lastSun 2:00s 1:00 S
Rule Latvia 1989 1996 - Sep lastSun 2:00s 0 -
@@ -1896,7 +1976,7 @@ Zone Europe/Vilnius 1:41:16 - LMT 1880
# Luxembourg
# Whitman disagrees with most of these dates in minor ways;
# go with Shanks & Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Lux 1916 only - May 14 23:00 1:00 S
Rule Lux 1916 only - Oct 1 1:00 0 -
Rule Lux 1917 only - Apr 28 23:00 1:00 S
@@ -1937,7 +2017,7 @@ Zone Europe/Luxembourg 0:24:36 - LMT 1904 Jun
# From Paul Eggert (2016-10-21):
# Assume 1900-1972 was like Rome, overriding Shanks.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Malta 1973 only - Mar 31 0:00s 1:00 S
Rule Malta 1973 only - Sep 29 0:00s 0 -
Rule Malta 1974 only - Apr 21 0:00s 1:00 S
@@ -2010,7 +2090,7 @@ Zone Europe/Malta 0:58:04 - LMT 1893 Nov 2 0:00s # Valletta
# says the 2014-03-30 spring-forward transition was at 02:00 local time.
# Guess that since 1997 Moldova has switched one hour before the EU.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Moldova 1997 max - Mar lastSun 2:00 1:00 S
Rule Moldova 1997 max - Oct lastSun 3:00 0 -
@@ -2028,11 +2108,24 @@ Zone Europe/Chisinau 1:55:20 - LMT 1880
2:00 Moldova EE%sT
# Monaco
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
+#
+# From Michael Deckers (2020-06-12):
+# In the "Journal de Monaco" of 1892-05-24, online at
+# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/b1c67c12c5af11b41ea888fb048e4fe8.pdf
+# we read: ...
+# [In virtue of a Sovereign Ordinance of the May 13 of the current [year],
+# legal time in the Principality will be set to, from the date of June 1,
+# 1892 onwards, to the meridian of Paris, as in France.]
+# In the "Journal de Monaco" of 1911-03-28, online at
+# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/de74ffb7db53d4f599059fe8f0ed482a.pdf
+# we read an ordinance of 1911-03-16: ...
+# [Legal time in the Principality will be set, from the date of promulgation
+# of the present ordinance, to legal time in France.... Consequently, legal
+# time will be retarded by 9 minutes and 21 seconds.]
+#
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
- 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
+Zone Europe/Monaco 0:29:32 - LMT 1892 Jun 1
+ 0:09:21 - PMT 1911 Mar 29 # Paris Mean Time
0:00 France WE%sT 1945 Sep 16 3:00
1:00 France CE%sT 1977
1:00 EU CE%sT
@@ -2080,7 +2173,7 @@ Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
# The data entries before 1945 are taken from
# https://www.staff.science.uu.nl/~gent0113/wettijd/wettijd.htm
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time
Rule Neth 1916 only - Oct 1 0:00 0 AMT # Amsterdam Mean Time
Rule Neth 1917 only - Apr 16 2:00s 1:00 NST
@@ -2117,7 +2210,7 @@ Zone Europe/Amsterdam 0:19:32 - LMT 1835
# Norway
# http://met.no/met/met_lex/q_u/sommertid.html (2004-01) agrees with Shanks &
# Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Norway 1916 only - May 22 1:00 1:00 S
Rule Norway 1916 only - Sep 30 0:00 0 -
Rule Norway 1945 only - Apr 2 2:00s 1:00 S
@@ -2186,7 +2279,7 @@ Link Europe/Oslo Arctic/Longyearbyen
# The 1919 dates and times can be found in Tygodnik Urzędowy nr 1 (1919-03-20),
# pp 1-2.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Poland 1918 1919 - Sep 16 2:00s 0 -
Rule Poland 1919 only - Apr 15 2:00s 1:00 S
Rule Poland 1944 only - Apr 3 2:00s 1:00 S
@@ -2257,7 +2350,7 @@ Zone Europe/Warsaw 1:24:00 - LMT 1880
# Guess that the Azores changed to EU rules in 1992 (since that's when Portugal
# harmonized with EU rules), and that they stayed +0:00 that winter.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# DSH writes that despite Decree 1,469 (1915), the change to the clocks was not
# done every year, depending on what Spain did, because of railroad schedules.
# Go with Shanks & Pottenger.
@@ -2370,7 +2463,7 @@ Zone Atlantic/Madeira -1:07:36 - LMT 1884 # Funchal
# assume that Romania and Moldova switched to EU rules in 1997,
# the same year as Bulgaria.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Romania 1932 only - May 21 0:00s 1:00 S
Rule Romania 1932 1939 - Oct Sun>=1 0:00s 0 -
Rule Romania 1933 1939 - Apr Sun>=2 0:00s 1:00 S
@@ -3468,14 +3561,14 @@ Link Europe/Prague Europe/Bratislava
# fallback transition from the next day's 00:59... to 00:00.
# From Michael Deckers (2016-12-15):
-# The Royal Decree of 1900-06-26 quoted by Planesas, online at
+# The Royal Decree of 1900-07-26 quoted by Planesas, online at
# https://www.boe.es/datos/pdfs/BOE//1900/209/A00383-00384.pdf
# says in its article 5 (my translation):
# These dispositions will enter into force beginning with the
# instant at which, according to the time indicated in article 1,
# the 1st day of January of 1901 will begin.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Spain 1918 only - Apr 15 23:00 1:00 S
Rule Spain 1918 1919 - Oct 6 24:00s 0 -
Rule Spain 1919 only - Apr 6 23:00 1:00 S
@@ -3612,7 +3705,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1
# By the end of the 18th century clocks and watches became commonplace
# and their performance improved enormously. Communities began to keep
# mean time in preference to apparent time - Geneva from 1780 ....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# From Whitman (who writes "Midnight?"):
# Rule Swiss 1940 only - Nov 2 0:00 1:00 S
# Rule Swiss 1940 only - Dec 31 0:00 0 -
@@ -3699,7 +3792,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1
# 1853-07-16, though it probably occurred at some other date in Zurich, and
# legal civil time probably changed at still some other transition date.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Swiss 1941 1942 - May Mon>=1 1:00 1:00 S
Rule Swiss 1941 1942 - Oct Mon>=1 2:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -3848,7 +3941,7 @@ Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 # See above comment.
# Although Google Translate misfires on that source, it looks like
# Turkey reversed last month's decision, and so will stay at +03.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Turkey 1916 only - May 1 0:00 1:00 S
Rule Turkey 1916 only - Oct 1 0:00 0 -
Rule Turkey 1920 only - Mar 28 0:00 1:00 S
@@ -4006,7 +4099,7 @@ Zone Europe/Kiev 2:02:04 - LMT 1880
2:00 1:00 EEST 1991 Sep 29 3:00
2:00 E-Eur EE%sT 1995
2:00 EU EE%sT
-# Ruthenia used CET 1990/1991.
+# Transcarpathia used CET 1990/1991.
# "Uzhhorod" is the transliteration of the Rusyn/Ukrainian pronunciation, but
# "Uzhgorod" is more common in English.
Zone Europe/Uzhgorod 1:29:12 - LMT 1890 Oct
diff --git a/make/data/tzdata/leapseconds b/make/data/tzdata/leapseconds
index fe8e170ed26..e00b297baed 100644
--- a/make/data/tzdata/leapseconds
+++ b/make/data/tzdata/leapseconds
@@ -91,11 +91,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this.
# This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file.
-#Expires 2020 Dec 28 00:00:00
+#Expires 2021 Jun 28 00:00:00
# POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC)
-#expires 1609113600 (2020-12-28 00:00:00 UTC)
+#expires 1624838400 (2021-06-28 00:00:00 UTC)
-# Updated through IERS Bulletin C59
-# File expires on: 28 December 2020
+# Updated through IERS Bulletin C60
+# File expires on: 28 June 2021
diff --git a/make/data/tzdata/northamerica b/make/data/tzdata/northamerica
index 60c7addef09..9a70e313c78 100644
--- a/make/data/tzdata/northamerica
+++ b/make/data/tzdata/northamerica
@@ -193,7 +193,7 @@
# U.S. government action. So even though the "US" rules have changed
# in the latest release, other countries won't be affected.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule US 1918 1919 - Mar lastSun 2:00 1:00 D
Rule US 1918 1919 - Oct lastSun 2:00 0 S
Rule US 1942 only - Feb 9 2:00 1:00 W # War
@@ -370,7 +370,7 @@ Zone PST8PDT -8:00 US P%sT
# Eastern time (i.e., -4:56:01.6) just before the 1883 switch. Round to the
# nearest second.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule NYC 1920 only - Mar lastSun 2:00 1:00 D
Rule NYC 1920 only - Oct lastSun 2:00 0 S
Rule NYC 1921 1966 - Apr lastSun 2:00 1:00 D
@@ -454,7 +454,7 @@ Zone America/New_York -4:56:02 - LMT 1883 Nov 18 12:03:58
# The Tennessean 2007-05-11, republished 2015-04-06.
# https://www.tennessean.com/story/insider/extras/2015/04/06/archives-seigenthaler-for-100-years-the-tennessean-had-it-covered/25348545/
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Chicago 1920 only - Jun 13 2:00 1:00 D
Rule Chicago 1920 1921 - Oct lastSun 2:00 0 S
Rule Chicago 1921 only - Mar lastSun 2:00 1:00 D
@@ -523,7 +523,7 @@ Zone America/North_Dakota/Beulah -6:47:07 - LMT 1883 Nov 18 12:12:53
# El Paso Times. 2018-10-24 06:40 -06.
# https://www.elpasotimes.com/story/news/local/el-paso/2018/10/24/el-pasoans-were-time-rebels-fought-stay-mountain-zone/1744509002/
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Denver 1920 1921 - Mar lastSun 2:00 1:00 D
Rule Denver 1920 only - Oct lastSun 2:00 0 S
Rule Denver 1921 only - May 22 2:00 0 S
@@ -576,7 +576,7 @@ Zone America/Denver -6:59:56 - LMT 1883 Nov 18 12:00:04
# https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1501&context=ca_ballot_props
# https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1636&context=ca_ballot_props
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule CA 1948 only - Mar 14 2:01 1:00 D
Rule CA 1949 only - Jan 1 2:00 0 S
Rule CA 1950 1966 - Apr lastSun 1:00 1:00 D
@@ -934,7 +934,7 @@ Zone America/Boise -7:44:49 - LMT 1883 Nov 18 12:15:11
# going to switch from Central to Eastern Time on March 11, 2007....
# http://www.indystar.com/apps/pbcs.dll/article?AID=/20070207/LOCAL190108/702070524/0/LOCAL
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Indianapolis 1941 only - Jun 22 2:00 1:00 D
Rule Indianapolis 1941 1954 - Sep lastSun 2:00 0 S
Rule Indianapolis 1946 1954 - Apr lastSun 2:00 1:00 D
@@ -953,7 +953,7 @@ Zone America/Indiana/Indianapolis -5:44:38 - LMT 1883 Nov 18 12:15:22
#
# Eastern Crawford County, Indiana, left its clocks alone in 1974,
# as well as from 1976 through 2005.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Marengo 1951 only - Apr lastSun 2:00 1:00 D
Rule Marengo 1951 only - Sep lastSun 2:00 0 S
Rule Marengo 1954 1960 - Apr lastSun 2:00 1:00 D
@@ -972,7 +972,7 @@ Zone America/Indiana/Marengo -5:45:23 - LMT 1883 Nov 18 12:14:37
# Daviess, Dubois, Knox, and Martin Counties, Indiana,
# switched from eastern to central time in April 2006, then switched back
# in November 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Vincennes 1946 only - Apr lastSun 2:00 1:00 D
Rule Vincennes 1946 only - Sep lastSun 2:00 0 S
Rule Vincennes 1953 1954 - Apr lastSun 2:00 1:00 D
@@ -997,7 +997,7 @@ Zone America/Indiana/Vincennes -5:50:07 - LMT 1883 Nov 18 12:09:53
# The Indianapolis News, Friday 27 October 1967 states that Perry County
# returned to CST. It went again to EST on 27 April 1969, as documented by the
# Indianapolis star of Saturday 26 April.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Perry 1955 only - May 1 0:00 1:00 D
Rule Perry 1955 1960 - Sep lastSun 2:00 0 S
Rule Perry 1956 1963 - Apr lastSun 2:00 1:00 D
@@ -1014,7 +1014,7 @@ Zone America/Indiana/Tell_City -5:47:03 - LMT 1883 Nov 18 12:12:57
#
# Pike County, Indiana moved from central to eastern time in 1977,
# then switched back in 2006, then switched back again in 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Pike 1955 only - May 1 0:00 1:00 D
Rule Pike 1955 1960 - Sep lastSun 2:00 0 S
Rule Pike 1956 1964 - Apr lastSun 2:00 1:00 D
@@ -1035,7 +1035,7 @@ Zone America/Indiana/Petersburg -5:49:07 - LMT 1883 Nov 18 12:10:53
# An article on page A3 of the Sunday, 1991-10-27 Washington Post
# notes that Starke County switched from Central time to Eastern time as of
# 1991-10-27.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Starke 1947 1961 - Apr lastSun 2:00 1:00 D
Rule Starke 1947 1954 - Sep lastSun 2:00 0 S
Rule Starke 1955 1956 - Oct lastSun 2:00 0 S
@@ -1052,7 +1052,7 @@ Zone America/Indiana/Knox -5:46:30 - LMT 1883 Nov 18 12:13:30
#
# Pulaski County, Indiana, switched from eastern to central time in
# April 2006 and then switched back in March 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Pulaski 1946 1960 - Apr lastSun 2:00 1:00 D
Rule Pulaski 1946 1954 - Sep lastSun 2:00 0 S
Rule Pulaski 1955 1956 - Oct lastSun 2:00 0 S
@@ -1094,7 +1094,7 @@ Zone America/Indiana/Vevay -5:40:16 - LMT 1883 Nov 18 12:19:44
#
# Part of Kentucky left its clocks alone in 1974.
# This also includes Clark, Floyd, and Harrison counties in Indiana.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Louisville 1921 only - May 1 2:00 1:00 D
Rule Louisville 1921 only - Sep 1 2:00 0 S
Rule Louisville 1941 only - Apr lastSun 2:00 1:00 D
@@ -1208,7 +1208,7 @@ Zone America/Kentucky/Monticello -5:39:24 - LMT 1883 Nov 18 12:20:36
# election Michigan voters narrowly repealed DST, effective 1969.
#
# Most of Michigan observed DST from 1973 on, but was a bit late in 1975.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Detroit 1948 only - Apr lastSun 2:00 1:00 D
Rule Detroit 1948 only - Sep lastSun 2:00 0 S
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1225,7 +1225,7 @@ Zone America/Detroit -5:32:11 - LMT 1905
#
# Dickinson, Gogebic, Iron, and Menominee Counties, Michigan,
# switched from EST to CST/CDT in 1973.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Menominee 1946 only - Apr lastSun 2:00 1:00 D
Rule Menominee 1946 only - Sep lastSun 2:00 0 S
Rule Menominee 1966 only - Apr lastSun 2:00 1:00 D
@@ -1395,7 +1395,7 @@ Zone America/Menominee -5:50:27 - LMT 1885 Sep 18 12:00
# Oct 31, to Oct 27, 1918 (and Sunday is a more likely transition day
# than Thursday) in all Canadian rulesets.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Canada 1918 only - Apr 14 2:00 1:00 D
Rule Canada 1918 only - Oct 27 2:00 0 S
Rule Canada 1942 only - Feb 9 2:00 1:00 W # War
@@ -1418,7 +1418,7 @@ Rule Canada 2007 max - Nov Sun>=1 2:00 0 S
# that follows the rules is the southeast corner, including Port Hope
# Simpson and Mary's Harbour, but excluding, say, Black Tickle.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule StJohns 1917 only - Apr 8 2:00 1:00 D
Rule StJohns 1917 only - Sep 17 2:00 0 S
# Whitman gives 1919 Apr 5 and 1920 Apr 5; go with Shanks & Pottenger.
@@ -1520,7 +1520,7 @@ Zone America/Goose_Bay -4:01:40 - LMT 1884 # Happy Valley-Goose Bay
# bill say that it is "accommodating the customs and practices" of those
# regions, which suggests that they have always been in-line with Halifax.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Halifax 1916 only - Apr 1 0:00 1:00 D
Rule Halifax 1916 only - Oct 1 0:00 0 S
Rule Halifax 1920 only - May 9 0:00 1:00 D
@@ -1586,7 +1586,7 @@ Zone America/Glace_Bay -3:59:48 - LMT 1902 Jun 15
# clear that this was the case since at least 1993.
# For now, assume it started in 1993.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Moncton 1933 1935 - Jun Sun>=8 1:00 1:00 D
Rule Moncton 1933 1935 - Sep Sun>=8 1:00 0 S
Rule Moncton 1936 1938 - Jun Sun>=1 1:00 1:00 D
@@ -1795,7 +1795,7 @@ Zone America/Blanc-Sablon -3:48:28 - LMT 1884
# With some exceptions, the use of daylight saving may be said to be limited
# to those cities and towns lying between Quebec city and Windsor, Ont.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Toronto 1919 only - Mar 30 23:30 1:00 D
Rule Toronto 1919 only - Oct 26 0:00 0 S
Rule Toronto 1920 only - May 2 2:00 1:00 D
@@ -1893,7 +1893,7 @@ Zone America/Atikokan -6:06:28 - LMT 1895
# starting 1966. Since 02:00s is clearly correct for 1967 on, assume
# it was also 02:00s in 1966.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Winn 1916 only - Apr 23 0:00 1:00 D
Rule Winn 1916 only - Sep 17 0:00 0 S
Rule Winn 1918 only - Apr 14 2:00 1:00 D
@@ -1984,7 +1984,7 @@ Zone America/Winnipeg -6:28:36 - LMT 1887 Jul 16
# long and rather painful to read.
# http://www.qp.gov.sk.ca/documents/English/Statutes/Statutes/T14.pdf
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Regina 1918 only - Apr 14 2:00 1:00 D
Rule Regina 1918 only - Oct 27 2:00 0 S
Rule Regina 1930 1934 - May Sun>=1 0:00 1:00 D
@@ -2034,7 +2034,7 @@ Zone America/Swift_Current -7:11:20 - LMT 1905 Sep
# Boyer JP. Forcing Choice: The Risky Reward of Referendums. Dundum. 2017.
# ISBN 978-1459739123.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Edm 1918 1919 - Apr Sun>=8 2:00 1:00 D
Rule Edm 1918 only - Oct 27 2:00 0 S
Rule Edm 1919 only - May 27 2:00 0 S
@@ -2143,7 +2143,7 @@ Zone America/Edmonton -7:33:52 - LMT 1906 Sep
# https://searcharchives.vancouver.ca/daylight-saving-1918-starts-again-july-7-1941-start-d-s-sept-27-end-of-d-s-1941
# We have no further details, so omit them for now.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Vanc 1918 only - Apr 14 2:00 1:00 D
Rule Vanc 1918 only - Oct 27 2:00 0 S
Rule Vanc 1942 only - Feb 9 2:00 1:00 W # War
@@ -2472,7 +2472,19 @@ Zone America/Creston -7:46:04 - LMT 1884
# consistency with nearby Dawson Creek, Creston, and Fort Nelson.
# https://yukon.ca/en/news/yukon-end-seasonal-time-change
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# From Andrew G. Smith (2020-09-24):
+# Yukon has completed its regulatory change to be on UTC -7 year-round....
+# http://www.gov.yk.ca/legislation/regs/oic2020_125.pdf
+# What we have done is re-defined Yukon Standard Time, as we are
+# authorized to do under section 33 of our Interpretation Act:
+# http://www.gov.yk.ca/legislation/acts/interpretation_c.pdf
+#
+# From Paul Eggert (2020-09-24):
+# tzdb uses the obsolete YST abbreviation for standard time in Yukon through
+# about 1970, and uses PST for standard time in Yukon since then. Consistent
+# with that, use MST for -07, the new standard time in Yukon effective Nov. 1.
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NT_YK 1918 only - Apr 14 2:00 1:00 D
Rule NT_YK 1918 only - Oct 27 2:00 0 S
Rule NT_YK 1919 only - May 25 2:00 1:00 D
@@ -2526,12 +2538,12 @@ Zone America/Inuvik 0 - -00 1953 # Inuvik founded
Zone America/Whitehorse -9:00:12 - LMT 1900 Aug 20
-9:00 NT_YK Y%sT 1967 May 28 0:00
-8:00 NT_YK P%sT 1980
- -8:00 Canada P%sT 2020 Mar 8 2:00
+ -8:00 Canada P%sT 2020 Nov 1
-7:00 - MST
Zone America/Dawson -9:17:40 - LMT 1900 Aug 20
-9:00 NT_YK Y%sT 1973 Oct 28 0:00
-8:00 NT_YK P%sT 1980
- -8:00 Canada P%sT 2020 Mar 8 2:00
+ -8:00 Canada P%sT 2020 Nov 1
-7:00 - MST
@@ -2746,7 +2758,7 @@ Zone America/Dawson -9:17:40 - LMT 1900 Aug 20
# 5- The islands, reefs and keys shall take their timezone from the
# longitude they are located at.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mexico 1939 only - Feb 5 0:00 1:00 D
Rule Mexico 1939 only - Jun 25 0:00 0 S
Rule Mexico 1940 only - Dec 9 0:00 1:00 D
@@ -2951,7 +2963,7 @@ Zone America/Tijuana -7:48:04 - LMT 1922 Jan 1 0:11:56
# rules to sync with the U.S. starting in 2007....
# http://www.jonesbahamas.com/?c=45&a=10412
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Bahamas 1964 1975 - Oct lastSun 2:00 0 S
Rule Bahamas 1964 1975 - Apr lastSun 2:00 1:00 D
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -2963,7 +2975,7 @@ Zone America/Nassau -5:09:30 - LMT 1912 Mar 2
# For 1899 Milne gives -3:58:29.2; round that.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Barb 1977 only - Jun 12 2:00 1:00 D
Rule Barb 1977 1978 - Oct Sun>=1 2:00 0 S
Rule Barb 1978 1980 - Apr Sun>=15 2:00 1:00 D
@@ -2976,7 +2988,7 @@ Zone America/Barbados -3:58:29 - LMT 1924 # Bridgetown
# Belize
# Whitman entirely disagrees with Shanks; go with Shanks & Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Belize 1918 1942 - Oct Sun>=2 0:00 0:30 -0530
Rule Belize 1919 1943 - Feb Sun>=9 0:00 0 CST
Rule Belize 1973 only - Dec 5 0:00 1:00 CDT
@@ -3013,7 +3025,7 @@ Zone Atlantic/Bermuda -4:19:18 - LMT 1930 Jan 1 2:00 # Hamilton
# Milne gives -5:36:13.3 as San José mean time; round to nearest.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule CR 1979 1980 - Feb lastSun 0:00 1:00 D
Rule CR 1979 1980 - Jun Sun>=1 0:00 0 S
Rule CR 1991 1992 - Jan Sat>=15 0:00 1:00 D
@@ -3187,7 +3199,7 @@ Zone America/Costa_Rica -5:36:13 - LMT 1890 # San José
# From Paul Eggert (2012-11-03):
# For now, assume the future rule is first Sunday in November.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cuba 1928 only - Jun 10 0:00 1:00 D
Rule Cuba 1928 only - Oct 10 0:00 0 S
Rule Cuba 1940 1942 - Jun Sun>=1 0:00 1:00 D
@@ -3256,7 +3268,7 @@ Zone America/Havana -5:29:28 - LMT 1890
# decided to revert.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule DR 1966 only - Oct 30 0:00 1:00 EDT
Rule DR 1967 only - Feb 28 0:00 0 EST
Rule DR 1969 1973 - Oct lastSun 0:00 0:30 -0430
@@ -3273,7 +3285,7 @@ Zone America/Santo_Domingo -4:39:36 - LMT 1890
# El Salvador
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Salv 1987 1988 - May Sun>=1 0:00 1:00 D
Rule Salv 1987 1988 - Sep lastSun 0:00 0 S
# There are too many San Salvadors elsewhere, so use America/El_Salvador
@@ -3302,7 +3314,7 @@ Zone America/El_Salvador -5:56:48 - LMT 1921 # San Salvador
# (2006-04-19), says DST ends at 24:00. See
# http://www.sieca.org.gt/Sitio_publico/Energeticos/Doc/Medidas/Cambio_Horario_Nac_190406.pdf
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Guat 1973 only - Nov 25 0:00 1:00 D
Rule Guat 1974 only - Feb 24 0:00 0 S
Rule Guat 1983 only - May 21 0:00 1:00 D
@@ -3383,7 +3395,7 @@ Zone America/Guatemala -6:02:04 - LMT 1918 Oct 5
# I have not been able to find a more authoritative source:
# https://www.haitilibre.com/en/news-20319-haiti-notices-time-change-in-haiti.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Haiti 1983 only - May 8 0:00 1:00 D
Rule Haiti 1984 1987 - Apr lastSun 0:00 1:00 D
Rule Haiti 1983 1987 - Oct lastSun 0:00 0 S
@@ -3431,7 +3443,7 @@ Zone America/Port-au-Prince -4:49:20 - LMT 1890
# http://www.laprensahn.com/pais_nota.php?id04962=7386
# So it seems that Honduras will not enter DST this year....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Hond 1987 1988 - May Sun>=1 0:00 1:00 D
Rule Hond 1987 1988 - Sep lastSun 0:00 0 S
Rule Hond 2006 only - May Sun>=1 0:00 1:00 D
@@ -3522,7 +3534,7 @@ Zone America/Martinique -4:04:20 - LMT 1890 # Fort-de-France
# The natural sun time is restored in all the national territory, in that the
# time is returned one hour at 01:00 am of October 1 of 2006.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Nic 1979 1980 - Mar Sun>=16 0:00 1:00 D
Rule Nic 1979 1980 - Jun Mon>=23 0:00 0 S
Rule Nic 2005 only - Apr 10 0:00 1:00 D
diff --git a/make/data/tzdata/pacificnew b/make/data/tzdata/pacificnew
deleted file mode 100644
index f19a876372c..00000000000
--- a/make/data/tzdata/pacificnew
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-# tzdb data for proposed US election time (this file is obsolete)
-
-# This file is in the public domain, so clarified as of
-# 2009-05-17 by Arthur David Olson.
-
-# From Arthur David Olson (1989-04-05):
-# On 1989-04-05, the U. S. House of Representatives passed (238-154) a bill
-# establishing "Pacific Presidential Election Time"; it was not acted on
-# by the Senate or signed into law by the President.
-# You might want to change the "PE" (Presidential Election) below to
-# "Q" (Quadrennial) to maintain three-character zone abbreviations.
-# If you're really conservative, you might want to change it to "D".
-# Avoid "L" (Leap Year), which won't be true in 2100.
-
-# If Presidential Election Time is ever established, replace "XXXX" below
-# with the year the law takes effect and uncomment the "##" lines.
-
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-## Rule Twilite XXXX max - Apr Sun>=1 2:00 1:00 D
-## Rule Twilite XXXX max uspres Oct lastSun 2:00 1:00 PE
-## Rule Twilite XXXX max uspres Nov Sun>=7 2:00 0 S
-## Rule Twilite XXXX max nonpres Oct lastSun 2:00 0 S
-
-# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL]
-## Zone America/Los_Angeles-PET -8:00 US P%sT XXXX
-## -8:00 Twilite P%sT
-
-# For now...
-Link America/Los_Angeles US/Pacific-New ##
diff --git a/make/data/tzdata/southamerica b/make/data/tzdata/southamerica
index 51795f7621b..566dabfadb4 100644
--- a/make/data/tzdata/southamerica
+++ b/make/data/tzdata/southamerica
@@ -71,7 +71,7 @@
# I am sending modifications to the Argentine time zone table...
# AR was chosen because they are the ISO letters that represent Argentina.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Arg 1930 only - Dec 1 0:00 1:00 -
Rule Arg 1931 only - Apr 1 0:00 0 -
Rule Arg 1931 only - Oct 15 0:00 1:00 -
@@ -792,7 +792,7 @@ Zone America/La_Paz -4:32:36 - LMT 1890
# From Paul Eggert (2013-10-17):
# For now, assume western Amazonas will change as well.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Decree 20,466 (1931-10-01)
# Decree 21,896 (1932-01-10)
Rule Brazil 1931 only - Oct 3 11:00 1:00 -
@@ -1281,7 +1281,7 @@ Zone America/Rio_Branco -4:31:12 - LMT 1914
# For now, assume that they will not revert,
# since they have extended the expiration date once already.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Chile 1927 1931 - Sep 1 0:00 1:00 -
Rule Chile 1928 1932 - Apr 1 0:00 0 -
Rule Chile 1968 only - Nov 3 4:00u 1:00 -
@@ -1381,7 +1381,7 @@ Zone Antarctica/Palmer 0 - -00 1965
# Milne gives 4:56:16.4 for Bogotá time in 1899; round to nearest. He writes,
# "A variation of fifteen minutes in the public clocks of Bogota is not rare."
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule CO 1992 only - May 3 0:00 1:00 -
Rule CO 1993 only - Apr 4 0:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1441,7 +1441,7 @@ Link America/Curacao America/Kralendijk # Caribbean Netherlands
# (Not one step back), the clocks went back in 1993 and the experiment was not
# repeated. For now, assume transitions were at 00:00 local time country-wide.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Ecuador 1992 only - Nov 28 0:00 1:00 -
Rule Ecuador 1993 only - Feb 5 0:00 0 -
#
@@ -1535,7 +1535,7 @@ Zone Pacific/Galapagos -5:58:24 - LMT 1931 # Puerto Baquerizo Moreno
# For now we will assume permanent -03 for the Falklands
# until advised differently (to apply for 2012 and beyond, after the 2011
# experiment was apparently successful.)
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Falk 1937 1938 - Sep lastSun 0:00 1:00 -
Rule Falk 1938 1942 - Mar Sun>=19 0:00 0 -
Rule Falk 1939 only - Oct 1 0:00 1:00 -
@@ -1581,7 +1581,7 @@ Zone America/Guyana -3:52:40 - LMT 1915 Mar # Georgetown
# No time of the day is established for the adjustment, so people normally
# adjust their clocks at 0 hour of the given dates.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Para 1975 1988 - Oct 1 0:00 1:00 -
Rule Para 1975 1978 - Mar 1 0:00 0 -
Rule Para 1979 1991 - Apr 1 0:00 0 -
@@ -1674,7 +1674,7 @@ Zone America/Asuncion -3:50:40 - LMT 1890
# From Paul Eggert (2006-03-22):
# Shanks & Pottenger don't have this transition. Assume 1986 was like 1987.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Peru 1938 only - Jan 1 0:00 1:00 -
Rule Peru 1938 only - Apr 1 0:00 0 -
Rule Peru 1938 1939 - Sep lastSun 0:00 1:00 -
@@ -1770,7 +1770,7 @@ Link America/Port_of_Spain America/Tortola # Virgin Islands (UK)
# https://www.impo.com.uy/diariooficial/1926/03/10/2
# https://www.impo.com.uy/diariooficial/1926/03/18/2
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Uruguay 1923 1925 - Oct 1 0:00 0:30 -
Rule Uruguay 1924 1926 - Apr 1 0:00 0 -
# From Tim Parenti (2018-02-15):
diff --git a/make/data/tzdata/systemv b/make/data/tzdata/systemv
deleted file mode 100644
index 9525ec47171..00000000000
--- a/make/data/tzdata/systemv
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-# tzdb data for System V rules (this file is obsolete)
-
-# This file is in the public domain, so clarified as of
-# 2009-05-17 by Arthur David Olson.
-
-# Old rules, should the need arise.
-# No attempt is made to handle Newfoundland, since it cannot be expressed
-# using the System V "TZ" scheme (half-hour offset), or anything outside
-# North America (no support for non-standard DST start/end dates), nor
-# the changes in the DST rules in the US after 1976 (which occurred after
-# the old rules were written).
-#
-# If you need the old rules, uncomment ## lines.
-# Compile this *without* leap second correction for true conformance.
-
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-Rule SystemV min 1973 - Apr lastSun 2:00 1:00 D
-Rule SystemV min 1973 - Oct lastSun 2:00 0 S
-Rule SystemV 1974 only - Jan 6 2:00 1:00 D
-Rule SystemV 1974 only - Nov lastSun 2:00 0 S
-Rule SystemV 1975 only - Feb 23 2:00 1:00 D
-Rule SystemV 1975 only - Oct lastSun 2:00 0 S
-Rule SystemV 1976 max - Apr lastSun 2:00 1:00 D
-Rule SystemV 1976 max - Oct lastSun 2:00 0 S
-
-# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL]
-## Zone SystemV/AST4ADT -4:00 SystemV A%sT
-## Zone SystemV/EST5EDT -5:00 SystemV E%sT
-## Zone SystemV/CST6CDT -6:00 SystemV C%sT
-## Zone SystemV/MST7MDT -7:00 SystemV M%sT
-## Zone SystemV/PST8PDT -8:00 SystemV P%sT
-## Zone SystemV/YST9YDT -9:00 SystemV Y%sT
-## Zone SystemV/AST4 -4:00 - AST
-## Zone SystemV/EST5 -5:00 - EST
-## Zone SystemV/CST6 -6:00 - CST
-## Zone SystemV/MST7 -7:00 - MST
-## Zone SystemV/PST8 -8:00 - PST
-## Zone SystemV/YST9 -9:00 - YST
-## Zone SystemV/HST10 -10:00 - HST
diff --git a/make/devkit/createJMHBundle.sh b/make/devkit/createJMHBundle.sh
index b56950c41ec..b460ee75311 100644
--- a/make/devkit/createJMHBundle.sh
+++ b/make/devkit/createJMHBundle.sh
@@ -26,7 +26,7 @@
# Create a bundle in the build directory, containing what's needed to
# build and run JMH microbenchmarks from the OpenJDK build.
-JMH_VERSION=1.21
+JMH_VERSION=1.26
COMMONS_MATH3_VERSION=3.2
JOPT_SIMPLE_VERSION=4.6
diff --git a/make/devkit/createMacosxDevkit.sh b/make/devkit/createMacosxDevkit.sh
index 2a7dfe2037b..cd105823366 100644
--- a/make/devkit/createMacosxDevkit.sh
+++ b/make/devkit/createMacosxDevkit.sh
@@ -91,7 +91,6 @@ EXCLUDE_DIRS=" \
Platforms/AppleTVSimulator.platform \
Platforms/iPhoneSimulator.platform \
Platforms/WatchSimulator.platform \
- Contents/SharedFrameworks/LLDB.framework \
Contents/SharedFrameworks/ModelIO.framework \
Contents/SharedFrameworks/XCSUI.framework \
Contents/SharedFrameworks/SceneKit.framework \
diff --git a/make/devkit/createWindowsDevkit2017.sh b/make/devkit/createWindowsDevkit2017.sh
index 91227259bdf..42c13251293 100644
--- a/make/devkit/createWindowsDevkit2017.sh
+++ b/make/devkit/createWindowsDevkit2017.sh
@@ -138,8 +138,8 @@ cp -r "$VS_INSTALL_DIR/$REDIST_SUBDIR/x86" $DEVKIT_ROOT/VC/redist/
cp $DEVKIT_ROOT/VC/redist/x86/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x86
cp $DEVKIT_ROOT/VC/redist/x86/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x86
cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64
-cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64
-cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64
+cp $DEVKIT_ROOT/VC/redist/x64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x64
+cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/arm64
cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64
################################################################################
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 733658d5d8b..fb7d48f1e27 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -138,6 +138,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
)))
endif
diff --git a/make/hotspot/gensrc/GensrcJvmti.gmk b/make/hotspot/gensrc/GensrcJvmti.gmk
index 312c8bc737a..b31a6f52292 100644
--- a/make/hotspot/gensrc/GensrcJvmti.gmk
+++ b/make/hotspot/gensrc/GensrcJvmti.gmk
@@ -106,17 +106,6 @@ $(eval $(call SetupJvmtiGeneration, jvmti.h, jvmtiH.xsl, \
$(eval $(call SetupJvmtiGeneration, jvmti.html, jvmti.xsl, \
-PARAM majorversion $(VERSION_FEATURE)))
-JVMTI_BC_SRCDIR := $(TOPDIR)/src/hotspot/share/interpreter/zero
-
-ifeq ($(call check-jvm-feature, zero), true)
- $(eval $(call SetupXslTransform, bytecodeInterpreterWithChecks.cpp, \
- XML_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xml, \
- XSL_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xsl, \
- OUTPUT_DIR := $(JVMTI_OUTPUTDIR), \
- DEPS := $(JVMTI_BC_SRCDIR)/bytecodeInterpreter.cpp, \
- ))
-endif
-
################################################################################
# Copy jvmti.h to include dir
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index 441c09a3853..65edd047571 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -91,11 +91,11 @@ DISABLED_WARNINGS_clang := tautological-compare \
undefined-var-template sometimes-uninitialized unknown-pragmas \
delete-non-virtual-dtor missing-braces char-subscripts \
ignored-qualifiers missing-field-initializers mismatched-tags \
- shift-negative-value
+ shift-negative-value misleading-indentation
DISABLED_WARNINGS_xlc := tautological-compare shift-negative-value
-DISABLED_WARNINGS_microsoft := 4100 4127 4201 4244 4291 4351 \
+DISABLED_WARNINGS_microsoft := 4100 4127 4146 4201 4244 4291 4351 \
4511 4512 4514 4624 4996
################################################################################
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 3647806e1d7..d96d006c5fc 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -126,6 +126,7 @@ ifneq ($(call check-jvm-feature, cds), true)
dynamicArchive.cpp \
filemap.cpp \
heapShared.cpp \
+ lambdaFormInvokers.cpp \
metaspaceShared.cpp \
metaspaceShared_$(HOTSPOT_TARGET_CPU).cpp \
metaspaceShared_$(HOTSPOT_TARGET_CPU_ARCH).cpp \
diff --git a/make/hotspot/symbols/symbols-aix b/make/hotspot/symbols/symbols-aix
index 0efd2dba97f..92703573a5f 100644
--- a/make/hotspot/symbols/symbols-aix
+++ b/make/hotspot/symbols/symbols-aix
@@ -21,7 +21,7 @@
# questions.
#
-JVM_handle_linux_signal
+JVM_handle_aix_signal
numa_error
numa_warn
sysThreadAvailableStackWithSlack
diff --git a/make/hotspot/symbols/symbols-unix b/make/hotspot/symbols/symbols-unix
index 97aa40b970b..1781d84ab94 100644
--- a/make/hotspot/symbols/symbols-unix
+++ b/make/hotspot/symbols/symbols-unix
@@ -143,14 +143,15 @@ JVM_InternString
JVM_Interrupt
JVM_InvokeMethod
JVM_IsArrayClass
-JVM_IsDynamicDumpingEnabled
-JVM_IsSharingEnabled
+JVM_IsCDSDumpingEnabled
JVM_IsConstructorIx
+JVM_IsDumpingClassList
JVM_IsHiddenClass
JVM_IsInterface
JVM_IsPrimitiveClass
JVM_IsRecord
JVM_IsSameClassPackage
+JVM_IsSharingEnabled
JVM_IsSupportedJNIVersion
JVM_IsThreadAlive
JVM_IsVMGeneratedMethodIx
@@ -158,6 +159,7 @@ JVM_LatestUserDefinedLoader
JVM_LoadLibrary
JVM_LookupDefineClass
JVM_LookupLambdaProxyClassFromArchive
+JVM_LogLambdaFormInvoker
JVM_MaxMemory
JVM_MaxObjectInspectionAge
JVM_MonitorNotify
@@ -169,11 +171,13 @@ JVM_NativePath
JVM_NewArray
JVM_NewInstanceFromConstructor
JVM_NewMultiArray
+JVM_PhantomReferenceRefersTo
JVM_RaiseSignal
JVM_RawMonitorCreate
JVM_RawMonitorDestroy
JVM_RawMonitorEnter
JVM_RawMonitorExit
+JVM_ReferenceRefersTo
JVM_RegisterLambdaProxyClassForArchiving
JVM_RegisterSignal
JVM_ReleaseUTF
diff --git a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
index 54c60eb43d1..653a1db10dd 100644
--- a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
+++ b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,24 @@
package build.tools.blacklistedcertsconverter;
+import java.io.IOException;
+import java.math.BigInteger;
import java.security.MessageDigest;
+import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.CertificateFactory;
import java.security.cert.X509Certificate;
+import java.security.interfaces.ECPublicKey;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
+import java.util.List;
import java.util.Set;
import java.util.TreeSet;
+import sun.security.util.DerInputStream;
+import sun.security.util.DerOutputStream;
+import sun.security.util.DerValue;
/**
* Converts blacklisted.certs.pem from System.in to blacklisted.certs in
@@ -75,8 +85,8 @@ public static void main(String[] args) throws Exception {
// Output sorted so that it's easy to locate an entry.
Set fingerprints = new TreeSet<>();
for (Certificate cert: certs) {
- fingerprints.add(
- getCertificateFingerPrint(mdAlg, (X509Certificate)cert));
+ fingerprints.addAll(
+ getCertificateFingerPrints(mdAlg, (X509Certificate)cert));
}
for (String s: fingerprints) {
@@ -97,17 +107,90 @@ private static void byte2hex(byte b, StringBuffer buf) {
}
/**
- * Gets the requested finger print of the certificate.
+ * Computes the possible fingerprints of the certificate.
*/
- private static String getCertificateFingerPrint(
+ private static List getCertificateFingerPrints(
String mdAlg, X509Certificate cert) throws Exception {
- byte[] encCertInfo = cert.getEncoded();
- MessageDigest md = MessageDigest.getInstance(mdAlg);
- byte[] digest = md.digest(encCertInfo);
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < digest.length; i++) {
- byte2hex(digest[i], buf);
+ List fingerprints = new ArrayList<>();
+ for (byte[] encoding : altEncodings(cert)) {
+ MessageDigest md = MessageDigest.getInstance(mdAlg);
+ byte[] digest = md.digest(encoding);
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < digest.length; i++) {
+ byte2hex(digest[i], buf);
+ }
+ fingerprints.add(buf.toString());
+ }
+ return fingerprints;
+ }
+
+ private static List altEncodings(X509Certificate c)
+ throws Exception {
+ List result = new ArrayList<>();
+
+ DerValue d = new DerValue(c.getEncoded());
+ DerValue[] seq = new DerValue[3];
+ // tbsCertificate
+ seq[0] = d.data.getDerValue();
+ // signatureAlgorithm
+ seq[1] = d.data.getDerValue();
+ // signature
+ seq[2] = d.data.getDerValue();
+
+ List algIds = Arrays.asList(seq[1], altAlgId(seq[1]));
+
+ List sigs;
+ PublicKey p = c.getPublicKey();
+ if (p instanceof ECPublicKey) {
+ ECPublicKey ep = (ECPublicKey) p;
+ BigInteger mod = ep.getParams().getOrder();
+ sigs = Arrays.asList(seq[2], altSig(mod, seq[2]));
+ } else {
+ sigs = Arrays.asList(seq[2]);
+ }
+
+ for (DerValue algId : algIds) {
+ for (DerValue sig : sigs) {
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.putDerValue(seq[0]);
+ tmp.putDerValue(algId);
+ tmp.putDerValue(sig);
+ DerOutputStream tmp2 = new DerOutputStream();
+ tmp2.write(DerValue.tag_Sequence, tmp);
+ result.add(tmp2.toByteArray());
+ }
+ }
+ return result;
+ }
+
+ private static DerValue altSig(BigInteger mod, DerValue sig)
+ throws IOException {
+ byte[] sigBits = sig.getBitString();
+ DerInputStream in =
+ new DerInputStream(sigBits, 0, sigBits.length, false);
+ DerValue[] values = in.getSequence(2);
+ BigInteger r = values[0].getBigInteger();
+ BigInteger s = values[1].getBigInteger();
+ BigInteger s2 = s.negate().mod(mod);
+ DerOutputStream out = new DerOutputStream();
+ out.putInteger(r);
+ out.putInteger(s2);
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.putBitString(new DerValue(DerValue.tag_Sequence,
+ out.toByteArray()).toByteArray());
+ return new DerValue(tmp.toByteArray());
+ }
+
+ private static DerValue altAlgId(DerValue algId) throws IOException {
+ DerInputStream in = algId.toDerInputStream();
+ DerOutputStream bytes = new DerOutputStream();
+ bytes.putOID(in.getOID());
+ // encode parameters as NULL if not present or omit if NULL
+ if (in.available() == 0) {
+ bytes.putNull();
}
- return buf.toString();
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.write(DerValue.tag_Sequence, bytes);
+ return new DerValue(tmp.toByteArray());
}
}
diff --git a/make/jdk/src/classes/build/tools/spp/Spp.java b/make/jdk/src/classes/build/tools/spp/Spp.java
index 6921c65667b..2a0cb57bc39 100644
--- a/make/jdk/src/classes/build/tools/spp/Spp.java
+++ b/make/jdk/src/classes/build/tools/spp/Spp.java
@@ -106,7 +106,7 @@ public static void main(String args[]) throws Exception {
static final String LNSEP = System.getProperty("line.separator");
static final String KEY = "([a-zA-Z0-9]+)";
static final String VAR = "([a-zA-Z0-9_\\-]+)";
- static final String TEXT = "([a-zA-Z0-9&;,.<>/#() \\?\\[\\]\\$]+)"; // $ -- hack embedded $var$
+ static final String TEXT = "([\\p{Print}&&[^{#:}]]+)";
static final int GN_NOT = 1;
static final int GN_KEY = 2;
@@ -140,6 +140,10 @@ void append(StringBuffer buf, String ln,
}
}
}
+ if (repl == null) {
+ System.err.println("Error: undefined variable in line " + ln);
+ System.exit(-1);
+ }
vardef.appendReplacement(buf, repl);
}
vardef.appendTail(buf);
diff --git a/make/modules/java.base/Copy.gmk b/make/modules/java.base/Copy.gmk
index 9071f4e6e37..040b7588ba1 100644
--- a/make/modules/java.base/Copy.gmk
+++ b/make/modules/java.base/Copy.gmk
@@ -182,12 +182,16 @@ endif
################################################################################
-$(eval $(call SetupCopyFiles, COPY_NET_PROPERTIES, \
- FILES := $(TOPDIR)/src/java.base/share/conf/net.properties, \
- DEST := $(CONF_DST_DIR), \
-))
+NET_PROPERTIES_SRCS := $(TOPDIR)/src/java.base/share/conf/net.properties \
+ $(TOPDIR)/src/java.base/$(OPENJDK_TARGET_OS_TYPE)/conf/net.properties
+
+NET_PROPERTIES_DST := $(CONF_DST_DIR)/net.properties
+
+$(NET_PROPERTIES_DST): $(NET_PROPERTIES_SRCS)
+ $(call MakeTargetDir)
+ $(CAT) $(NET_PROPERTIES_SRCS) > $@
-TARGETS += $(COPY_NET_PROPERTIES)
+TARGETS += $(NET_PROPERTIES_DST)
ifeq ($(call isTargetOs, linux), true)
$(eval $(call SetupCopyFiles, COPY_SDP_CONF, \
diff --git a/make/modules/java.base/gendata/GendataTZDB.gmk b/make/modules/java.base/gendata/GendataTZDB.gmk
index 54e6582d81d..1352178694f 100644
--- a/make/modules/java.base/gendata/GendataTZDB.gmk
+++ b/make/modules/java.base/gendata/GendataTZDB.gmk
@@ -29,7 +29,7 @@ GENDATA_TZDB :=
# Time zone data file creation
#
TZDATA_DIR := $(TOPDIR)/make/data/tzdata
-TZDATA_TZFILE := africa antarctica asia australasia europe northamerica pacificnew southamerica backward etcetera gmt jdk11_backward
+TZDATA_TZFILE := africa antarctica asia australasia europe northamerica southamerica backward etcetera gmt jdk11_backward
TZDATA_TZFILES := $(addprefix $(TZDATA_DIR)/,$(TZDATA_TZFILE))
GENDATA_TZDB_DAT := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)/tzdb.dat
diff --git a/make/modules/java.base/lib/CoreLibraries.gmk b/make/modules/java.base/lib/CoreLibraries.gmk
index f2b94fe717e..1d5fede2aa8 100644
--- a/make/modules/java.base/lib/CoreLibraries.gmk
+++ b/make/modules/java.base/lib/CoreLibraries.gmk
@@ -49,7 +49,7 @@ $(eval $(call SetupNativeCompilation, BUILD_LIBFDLIBM, \
CFLAGS_windows_debug := -DLOGGING, \
CFLAGS_aix := -qfloat=nomaf, \
DISABLED_WARNINGS_gcc := sign-compare misleading-indentation array-bounds, \
- DISABLED_WARNINGS_clang := sign-compare, \
+ DISABLED_WARNINGS_clang := sign-compare misleading-indentation, \
DISABLED_WARNINGS_microsoft := 4146 4244 4018, \
ARFLAGS := $(ARFLAGS), \
OBJECT_DIR := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libfdlibm, \
diff --git a/make/modules/java.desktop/lib/Awt2dLibraries.gmk b/make/modules/java.desktop/lib/Awt2dLibraries.gmk
index 7fbd1049f89..3203378d00a 100644
--- a/make/modules/java.desktop/lib/Awt2dLibraries.gmk
+++ b/make/modules/java.desktop/lib/Awt2dLibraries.gmk
@@ -435,7 +435,6 @@ endif
ifeq ($(USE_EXTERNAL_HARFBUZZ), true)
LIBHARFBUZZ_LIBS := $(HARFBUZZ_LIBS)
else
- HARFBUZZ_CFLAGS := -DHAVE_OT -DHAVE_FALLBACK -DHAVE_UCDN -DHAVE_ROUND
# This is better than adding EXPORT_ALL_SYMBOLS
ifneq ($(filter $(TOOLCHAIN_TYPE), gcc clang), )
@@ -493,7 +492,7 @@ else
maybe-uninitialized class-memaccess, \
DISABLED_WARNINGS_clang := unused-value incompatible-pointer-types \
tautological-constant-out-of-range-compare int-to-pointer-cast \
- undef missing-field-initializers, \
+ undef missing-field-initializers range-loop-analysis, \
DISABLED_WARNINGS_microsoft := 4267 4244 4090 4146 4334 4819 4101 4068 4805 4138, \
LDFLAGS := $(LDFLAGS_JDKLIB) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
diff --git a/make/modules/jdk.javadoc/Gendata.gmk b/make/modules/jdk.javadoc/Gendata.gmk
index 5b4485808c7..0ee146a1e21 100644
--- a/make/modules/jdk.javadoc/Gendata.gmk
+++ b/make/modules/jdk.javadoc/Gendata.gmk
@@ -54,7 +54,7 @@ $(eval $(call SetupJavaCompilation, COMPILE_CREATE_SYMBOLS, \
SRC := $(TOPDIR)/make/langtools/src/classes \
$(TOPDIR)/src/jdk.jdeps/share/classes, \
INCLUDES := build/tools/symbolgenerator com/sun/tools/classfile, \
- BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols, \
+ BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc, \
DISABLED_WARNINGS := options, \
JAVAC_FLAGS := \
$(INTERIM_LANGTOOLS_ARGS) \
@@ -71,7 +71,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \
$(ECHO) Creating javadoc element list
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
- -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
+ -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \
build.tools.symbolgenerator.CreateSymbols \
build-javadoc-data \
$(CT_DATA_DESCRIPTION) \
@@ -79,7 +79,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \
11
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
- -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
+ -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \
build.tools.symbolgenerator.JavadocElementList \
$(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases/element-list-$(JDK_SOURCE_TARGET_VERSION).txt \
$(JAVADOC_MODULESOURCEPATH) \
diff --git a/make/modules/jdk.incubator.jpackage/Gensrc.gmk b/make/modules/jdk.jpackage/Gensrc.gmk
similarity index 93%
rename from make/modules/jdk.incubator.jpackage/Gensrc.gmk
rename to make/modules/jdk.jpackage/Gensrc.gmk
index 5948a80f120..6f3e8b08119 100644
--- a/make/modules/jdk.incubator.jpackage/Gensrc.gmk
+++ b/make/modules/jdk.jpackage/Gensrc.gmk
@@ -31,7 +31,7 @@ include GensrcCommonJdk.gmk
ifeq ($(call isTargetOs, macosx), true)
ENTITLEMENTS_SRC_FILE := $(TOPDIR)/make/data/macosxsigning/java.plist
ENTITLEMENTS_TARGET_FILE := \
- $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/incubator/jpackage/internal/resources/entitlements.plist
+ $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/jpackage/internal/resources/entitlements.plist
$(ENTITLEMENTS_TARGET_FILE): $(ENTITLEMENTS_SRC_FILE)
$(call install-file)
diff --git a/make/modules/jdk.incubator.jpackage/Launcher.gmk b/make/modules/jdk.jpackage/Launcher.gmk
similarity index 95%
rename from make/modules/jdk.incubator.jpackage/Launcher.gmk
rename to make/modules/jdk.jpackage/Launcher.gmk
index 7a25dae733c..8d553d5c107 100644
--- a/make/modules/jdk.incubator.jpackage/Launcher.gmk
+++ b/make/modules/jdk.jpackage/Launcher.gmk
@@ -26,5 +26,5 @@
include LauncherCommon.gmk
$(eval $(call SetupBuildLauncher, jpackage, \
- MAIN_CLASS := jdk.incubator.jpackage.main.Main, \
+ MAIN_CLASS := jdk.jpackage.main.Main, \
))
diff --git a/make/modules/jdk.incubator.jpackage/Lib.gmk b/make/modules/jdk.jpackage/Lib.gmk
similarity index 91%
rename from make/modules/jdk.incubator.jpackage/Lib.gmk
rename to make/modules/jdk.jpackage/Lib.gmk
index 7ffef99afe4..7dfb70be5a6 100644
--- a/make/modules/jdk.incubator.jpackage/Lib.gmk
+++ b/make/modules/jdk.jpackage/Lib.gmk
@@ -29,8 +29,8 @@ include LibCommon.gmk
JPACKAGE_APPLAUNCHER_SRC := \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, applauncher) \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common)
+ $(call FindSrcDirsForComponent, jdk.jpackage, applauncher) \
+ $(call FindSrcDirsForComponent, jdk.jpackage, common)
ifeq ($(call isTargetOs, windows), true)
@@ -42,7 +42,7 @@ else
endif
-JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/incubator/jpackage/internal/resources
+JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/jpackage/internal/resources
JPACKAGE_CXXFLAGS_windows := -EHsc -DUNICODE -D_UNICODE
# Output app launcher executable in resources dir, and symbols in the object dir
@@ -73,7 +73,7 @@ ifeq ($(call isTargetOs, windows), true)
$(eval $(call SetupJdkLibrary, BUILD_LIB_JPACKAGE, \
NAME := jpackage, \
OPTIMIZATION := LOW, \
- EXTRA_SRC := jdk.incubator.jpackage:common, \
+ EXTRA_SRC := jdk.jpackage:common, \
CXXFLAGS := $(CXXFLAGS_JDKLIB) $(JPACKAGE_CXXFLAGS_windows), \
LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
@@ -99,8 +99,8 @@ ifeq ($(call isTargetOs, windows), true)
TARGETS += $(BUILD_LIB_WIXHELPER)
JPACKAGE_MSIWRAPPER_SRC := \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, msiwrapper) \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common)
+ $(call FindSrcDirsForComponent, jdk.jpackage, msiwrapper) \
+ $(call FindSrcDirsForComponent, jdk.jpackage, common)
# Build exe installer wrapper for msi installer
$(eval $(call SetupJdkExecutable, BUILD_JPACKAGE_MSIWRAPPER, \
diff --git a/make/scripts/compare.sh b/make/scripts/compare.sh
index 25630199a21..5d0e846e755 100644
--- a/make/scripts/compare.sh
+++ b/make/scripts/compare.sh
@@ -696,7 +696,7 @@ compare_bin_file() {
# pdb files.
PDB_DIRS="$(ls -d \
{$OTHER,$THIS}/support/modules_{cmds,libs}/{*,*/*} \
- {$OTHER,$THIS}/support/native/jdk.incubator.jpackage/* \
+ {$OTHER,$THIS}/support/native/jdk.jpackage/* \
)"
export _NT_SYMBOL_PATH="$(echo $PDB_DIRS | tr ' ' ';')"
fi
diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk
index 3bbbea47b8e..55e5026eb38 100644
--- a/make/test/BuildMicrobenchmark.gmk
+++ b/make/test/BuildMicrobenchmark.gmk
@@ -90,11 +90,10 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
SMALL_JAVA := false, \
CLASSPATH := $(MICROBENCHMARK_CLASSPATH), \
- DISABLED_WARNINGS := processing rawtypes cast serial preview, \
+ DISABLED_WARNINGS := processing rawtypes cast serial, \
SRC := $(MICROBENCHMARK_SRC), \
BIN := $(MICROBENCHMARK_CLASSES), \
JAVA_FLAGS := --add-modules jdk.unsupported --limit-modules java.management, \
- JAVAC_FLAGS := --enable-preview, \
))
$(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS)
diff --git a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
index 210970f6469..3c0f936358c 100644
--- a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
+++ b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
@@ -73,14 +73,16 @@ public static void init() {
ColorSpace.CS_sRGB,
ColorSpace.CS_GRAY,
ColorSpace.CS_LINEAR_RGB,
- ColorSpace.CS_CIEXYZ
+ ColorSpace.CS_CIEXYZ,
+ ColorSpace.CS_PYCC
};
String[] csNames = new String[]{
"CS_sRGB",
"CS_GRAY",
"CS_LINEAR_RGB",
- "CS_CIEXYZ"
+ "CS_CIEXYZ",
+ "CS_PYCC"
};
csList = new Option.IntList(cmmOptRoot,
diff --git a/src/demo/share/jfc/Notepad/Notepad.java b/src/demo/share/jfc/Notepad/Notepad.java
index 3ebe3f07d22..cb4552f94cf 100644
--- a/src/demo/share/jfc/Notepad/Notepad.java
+++ b/src/demo/share/jfc/Notepad/Notepad.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -60,7 +60,7 @@
* @author Timothy Prinzing
*/
@SuppressWarnings("serial")
-class Notepad extends JPanel {
+public class Notepad extends JPanel {
protected static Properties properties;
private static ResourceBundle resources;
diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
index 104104b09a9..615fe5e045f 100644
--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py
+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
@@ -1,4 +1,7 @@
+import os
import random
+import subprocess
+import sys
AARCH64_AS = "as"
AARCH64_OBJDUMP = "objdump"
@@ -129,6 +132,8 @@ class OperandFactory:
_modes = {'x' : GeneralRegister,
'w' : GeneralRegister,
+ 'b' : FloatRegister,
+ 'h' : FloatRegister,
's' : FloatRegister,
'd' : FloatRegister,
'z' : FloatZero,
@@ -198,16 +203,16 @@ def __init__(self, name, mode):
self.isFloat = (mode == 'd') | (mode == 's')
if self.isFloat:
self.isWord = mode != 'd'
- self.asmRegPrefix = ["d", "s"][self.isWord]
+ self.asmRegPrefix = ["d", "s"][self.isWord]
else:
self.isWord = mode != 'x'
self.asmRegPrefix = ["x", "w"][self.isWord]
-
+
def name(self):
return self._name + (self.mode if self.mode != 'x' else '')
-
+
def aname(self):
- return (self._name+mode if (mode == 'b' or mode == 'h')
+ return (self._name+mode if (mode == 'b' or mode == 'h')
else self._name)
class ThreeRegInstruction(Instruction):
@@ -220,17 +225,17 @@ def generate(self):
def cstr(self):
return (super(ThreeRegInstruction, self).cstr()
- + ('%s, %s, %s'
+ + ('%s, %s, %s'
% (self.reg[0],
self.reg[1], self.reg[2])))
-
+
def astr(self):
prefix = self.asmRegPrefix
return (super(ThreeRegInstruction, self).astr()
- + ('%s, %s, %s'
+ + ('%s, %s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix), self.reg[2].astr(prefix))))
-
+
class FourRegInstruction(ThreeRegInstruction):
def generate(self):
@@ -241,12 +246,12 @@ def generate(self):
def cstr(self):
return (super(FourRegInstruction, self).cstr()
+ (', %s' % self.reg[3]))
-
+
def astr(self):
prefix = self.asmRegPrefix
return (super(FourRegInstruction, self).astr()
+ (', %s' % self.reg[3].astr(prefix)))
-
+
class TwoRegInstruction(Instruction):
def generate(self):
@@ -261,17 +266,17 @@ def cstr(self):
def astr(self):
prefix = self.asmRegPrefix
return (super(TwoRegInstruction, self).astr()
- + ('%s, %s'
+ + ('%s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix))))
-
+
class TwoRegImmedInstruction(TwoRegInstruction):
def generate(self):
super(TwoRegImmedInstruction, self).generate()
self.immed = random.randint(0, 1<<11 -1)
return self
-
+
def cstr(self):
return (super(TwoRegImmedInstruction, self).cstr()
+ ', %su' % self.immed)
@@ -301,9 +306,9 @@ def generate(self):
self.kind = ShiftKind().generate()
self.distance = random.randint(0, (1<<5)-1 if self.isWord else (1<<6)-1)
return self
-
+
def cstr(self):
- return ('%s, Assembler::%s, %s);'
+ return ('%s, Assembler::%s, %s);'
% (ThreeRegInstruction.cstr(self),
self.kind.cstr(), self.distance))
@@ -314,9 +319,9 @@ def astr(self):
self.distance))
class AddSubCarryOp(ThreeRegInstruction):
-
+
def cstr(self):
- return ('%s);'
+ return ('%s);'
% (ThreeRegInstruction.cstr(self)))
class AddSubExtendedOp(ThreeRegInstruction):
@@ -332,76 +337,75 @@ def generate(self):
def cstr(self):
return (super(AddSubExtendedOp, self).cstr()
- + (", ext::" + AddSubExtendedOp.optNames[self.option]
+ + (", ext::" + AddSubExtendedOp.optNames[self.option]
+ ", " + str(self.amount) + ");"))
-
+
def astr(self):
return (super(AddSubExtendedOp, self).astr()
- + (", " + AddSubExtendedOp.optNames[self.option]
+ + (", " + AddSubExtendedOp.optNames[self.option]
+ " #" + str(self.amount)))
class AddSubImmOp(TwoRegImmedInstruction):
def cstr(self):
return super(AddSubImmOp, self).cstr() + ");"
-
+
class LogicalImmOp(AddSubImmOp):
# These tables are legal immediate logical operands
immediates32 \
- = [0x1, 0x3f, 0x1f0, 0x7e0,
- 0x1c00, 0x3ff0, 0x8000, 0x1e000,
- 0x3e000, 0x78000, 0xe0000, 0x100000,
- 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
- 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
- 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
- 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
- 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
- 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
- 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
- 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
- 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
- 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
- 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
- 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
- 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
+ = [0x1, 0x3f, 0x1f0, 0x7e0,
+ 0x1c00, 0x3ff0, 0x8000, 0x1e000,
+ 0x3e000, 0x78000, 0xe0000, 0x100000,
+ 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
+ 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
+ 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
+ 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
+ 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
+ 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
+ 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
+ 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
+ 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
+ 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
+ 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
+ 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
+ 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
0xffffffbf, 0xfffffffd]
immediates \
- = [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
- 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
- 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
- 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
- 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
- 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
- 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
- 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
- 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
- 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
- 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
- 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
- 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
- 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
- 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
+ = [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
+ 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
+ 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
+ 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
+ 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
+ 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
+ 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
+ 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
+ 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
+ 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
+ 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
+ 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
+ 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
+ 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
+ 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
0xfffffffffc01ffff, 0xffffffffffc00003, 0xfffffffffffc000f, 0xffffffffffffe07f]
def generate(self):
AddSubImmOp.generate(self)
self.immed = \
self.immediates32[random.randint(0, len(self.immediates32)-1)] \
- if self.isWord \
- else \
- self.immediates[random.randint(0, len(self.immediates)-1)]
-
+ if self.isWord else \
+ self.immediates[random.randint(0, len(self.immediates)-1)]
+
return self
-
+
def astr(self):
return (super(TwoRegImmedInstruction, self).astr()
+ ', #0x%x' % self.immed)
def cstr(self):
return super(AddSubImmOp, self).cstr() + "ll);"
-
+
class MultiOp():
def multipleForms(self):
@@ -422,9 +426,9 @@ def astr(self):
return Instruction.astr(self) + "%s"
class RegAndAbsOp(MultiOp, Instruction):
-
+
def multipleForms(self):
- if self.name() == "adrp":
+ if self.name() == "adrp":
# We can only test one form of adrp because anything other
# than "adrp ." requires relocs in the assembler output
return 1
@@ -434,11 +438,11 @@ def generate(self):
Instruction.generate(self)
self.reg = GeneralRegister().generate()
return self
-
+
def cstr(self):
if self.name() == "adrp":
return "__ _adrp(" + "%s, %s);" % (self.reg, "%s")
- return (super(RegAndAbsOp, self).cstr()
+ return (super(RegAndAbsOp, self).cstr()
+ "%s, %s);" % (self.reg, "%s"))
def astr(self):
@@ -446,14 +450,14 @@ def astr(self):
+ self.reg.astr(self.asmRegPrefix) + ", %s")
class RegImmAbsOp(RegAndAbsOp):
-
+
def cstr(self):
return (Instruction.cstr(self)
+ "%s, %s, %s);" % (self.reg, self.immed, "%s"))
def astr(self):
return (Instruction.astr(self)
- + ("%s, #%s, %s"
+ + ("%s, #%s, %s"
% (self.reg.astr(self.asmRegPrefix), self.immed, "%s")))
def generate(self):
@@ -462,7 +466,7 @@ def generate(self):
return self
class MoveWideImmOp(RegImmAbsOp):
-
+
def multipleForms(self):
return 0
@@ -472,8 +476,8 @@ def cstr(self):
def astr(self):
return (Instruction.astr(self)
- + ("%s, #%s, lsl %s"
- % (self.reg.astr(self.asmRegPrefix),
+ + ("%s, #%s, lsl %s"
+ % (self.reg.astr(self.asmRegPrefix),
self.immed, self.shift)))
def generate(self):
@@ -486,7 +490,7 @@ def generate(self):
return self
class BitfieldOp(TwoRegInstruction):
-
+
def cstr(self):
return (Instruction.cstr(self)
+ ("%s, %s, %s, %s);"
@@ -513,16 +517,16 @@ def generate(self):
def cstr(self):
return (ThreeRegInstruction.cstr(self)
+ (", %s);" % self.lsb))
-
+
def astr(self):
return (ThreeRegInstruction.astr(self)
+ (", #%s" % self.lsb))
-
+
class CondBranchOp(MultiOp, Instruction):
def cstr(self):
return "__ br(Assembler::" + self.name() + ", %s);"
-
+
def astr(self):
return "b." + self.name() + "\t%s"
@@ -530,10 +534,10 @@ class ImmOp(Instruction):
def cstr(self):
return "%s%s);" % (Instruction.cstr(self), self.immed)
-
+
def astr(self):
return Instruction.astr(self) + "#" + str(self.immed)
-
+
def generate(self):
self.immed = random.randint(0, 1<<16 -1)
return self
@@ -542,6 +546,8 @@ class Op(Instruction):
def cstr(self):
return Instruction.cstr(self) + ");"
+ def astr(self):
+ return self.aname();
class SystemOp(Instruction):
@@ -573,11 +579,11 @@ def generate(self):
return self
def cstr(self):
- return (super(ConditionalCompareOp, self).cstr() + ", "
+ return (super(ConditionalCompareOp, self).cstr() + ", "
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (super(ConditionalCompareOp, self).astr() +
+ return (super(ConditionalCompareOp, self).astr() +
", " + conditionCodes[self.cond])
class ConditionalCompareImmedOp(Instruction):
@@ -596,33 +602,33 @@ def cstr(self):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (Instruction.astr(self)
- + self.reg.astr(self.asmRegPrefix)
+ return (Instruction.astr(self)
+ + self.reg.astr(self.asmRegPrefix)
+ ", #" + str(self.immed)
+ ", #" + str(self.immed2)
+ ", " + conditionCodes[self.cond])
class TwoRegOp(TwoRegInstruction):
-
+
def cstr(self):
return TwoRegInstruction.cstr(self) + ");"
class ThreeRegOp(ThreeRegInstruction):
-
+
def cstr(self):
return ThreeRegInstruction.cstr(self) + ");"
class FourRegMulOp(FourRegInstruction):
-
+
def cstr(self):
return FourRegInstruction.cstr(self) + ");"
def astr(self):
isMaddsub = self.name().startswith("madd") | self.name().startswith("msub")
midPrefix = self.asmRegPrefix if isMaddsub else "w"
- return (Instruction.astr(self)
- + self.reg[0].astr(self.asmRegPrefix)
- + ", " + self.reg[1].astr(midPrefix)
+ return (Instruction.astr(self)
+ + self.reg[0].astr(self.asmRegPrefix)
+ + ", " + self.reg[1].astr(midPrefix)
+ ", " + self.reg[2].astr(midPrefix)
+ ", " + self.reg[3].astr(self.asmRegPrefix))
@@ -638,8 +644,8 @@ def cstr(self):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (ThreeRegInstruction.astr(self)
- + ", " + conditionCodes[self.cond])
+ return (ThreeRegInstruction.astr(self)
+ + ", " + conditionCodes[self.cond])
class LoadStoreExclusiveOp(InstructionWithModes):
@@ -651,7 +657,7 @@ def astr(self):
result = self.aname() + '\t'
regs = list(self.regs)
index = regs.pop() # The last reg is the index register
- prefix = ('x' if (self.mode == 'x')
+ prefix = ('x' if (self.mode == 'x')
& ((self.name().startswith("ld"))
| (self.name().startswith("stlr"))) # Ewww :-(
else 'w')
@@ -698,17 +704,17 @@ def aname(self):
return self._name
class Address(object):
-
+
base_plus_unscaled_offset, pre, post, base_plus_reg, \
base_plus_scaled_offset, pcrel, post_reg, base_only = range(8)
- kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
+ kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
"base_plus_scaled_offset", "pcrel", "post_reg", "base_only"]
extend_kinds = ["uxtw", "lsl", "sxtw", "sxtx"]
@classmethod
def kindToStr(cls, i):
return cls.kinds[i]
-
+
def generate(self, kind, shift_distance):
self.kind = kind
self.base = GeneralRegister().generate()
@@ -738,7 +744,7 @@ def __str__(self):
Address.pcrel: "",
Address.base_plus_reg: "Address(%s, %s, Address::%s(%s))" \
% (self.base, self.index, self.extend_kind, self.shift_distance),
- Address.base_plus_scaled_offset:
+ Address.base_plus_scaled_offset:
"Address(%s, %s)" % (self.base, self.offset) } [self.kind]
if (self.kind == Address.pcrel):
result = ["__ pc()", "back", "forth"][self.offset]
@@ -758,7 +764,7 @@ def astr(self, prefix):
Address.base_only: "[%s]" % (self.base.astr(prefix)),
Address.pcrel: "",
Address.base_plus_reg: "[%s, %s, %s #%s]" \
- % (self.base.astr(prefix), self.index.astr(extend_prefix),
+ % (self.base.astr(prefix), self.index.astr(extend_prefix),
self.extend_kind, self.shift_distance),
Address.base_plus_scaled_offset: \
"[%s, %s]" \
@@ -767,7 +773,7 @@ def astr(self, prefix):
if (self.kind == Address.pcrel):
result = [".", "back", "forth"][self.offset]
return result
-
+
class LoadStoreOp(InstructionWithModes):
def __init__(self, args):
@@ -822,14 +828,14 @@ def aname(self):
class LoadStorePairOp(InstructionWithModes):
numRegs = 2
-
+
def __init__(self, args):
name, self.asmname, self.kind, mode = args
InstructionWithModes.__init__(self, name, mode)
self.offset = random.randint(-1<<4, 1<<4-1) << 4
-
+
def generate(self):
- self.reg = [OperandFactory.create(self.mode).generate()
+ self.reg = [OperandFactory.create(self.mode).generate()
for i in range(self.numRegs)]
self.base = OperandFactory.create('x').generate()
kindStr = Address.kindToStr(self.kind);
@@ -846,8 +852,8 @@ def astr(self):
address = ["[%s, #%s]", "[%s, #%s]!", "[%s], #%s"][self.kind]
address = address % (self.base.astr('x'), self.offset)
result = "%s\t%s, %s, %s" \
- % (self.asmname,
- self.reg[0].astr(self.asmRegPrefix),
+ % (self.asmname,
+ self.reg[0].astr(self.asmRegPrefix),
self.reg[1].astr(self.asmRegPrefix), address)
return result
@@ -875,7 +881,7 @@ def __init__(self, args):
Instruction.__init__(self, name)
def generate(self):
- self.reg = [OperandFactory.create(self.modes[i]).generate()
+ self.reg = [OperandFactory.create(self.modes[i]).generate()
for i in range(self.numRegs)]
return self
@@ -884,7 +890,7 @@ def cstr(self):
return (formatStr
% tuple([Instruction.cstr(self)] +
[str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
-
+
def astr(self):
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
return (formatStr
@@ -985,7 +991,7 @@ def astr(self):
moreReg +
[str(self.reg[2]) + self._width.astr()])
-class LdStSIMDOp(Instruction):
+class LdStNEONOp(Instruction):
def __init__(self, args):
self._name, self.regnum, self.arrangement, self.addresskind = args
@@ -1004,7 +1010,7 @@ def generate(self):
return self
def cstr(self):
- buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)
+ buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg)
current = self._firstSIMDreg
for cnt in range(1, self.regnum):
buf = '%s, %s' % (buf, current.nextReg())
@@ -1022,6 +1028,57 @@ def astr(self):
def aname(self):
return self._name
+class NEONReduceInstruction(Instruction):
+ def __init__(self, args):
+ self._name, self.insname, self.arrangement = args
+
+ def generate(self):
+ current = FloatRegister().generate()
+ self.dstSIMDreg = current
+ self.srcSIMDreg = current.nextReg()
+ return self
+
+ def cstr(self):
+ buf = Instruction.cstr(self) + str(self.dstSIMDreg)
+ buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg)
+ return buf
+
+ def astr(self):
+ buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower()))
+ buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement)
+ return buf
+
+ def aname(self):
+ return self._name
+
+class CommonNEONInstruction(Instruction):
+ def __init__(self, args):
+ self._name, self.insname, self.arrangement = args
+
+ def generate(self):
+ self._firstSIMDreg = FloatRegister().generate()
+ return self
+
+ def cstr(self):
+ buf = Instruction.cstr(self) + str(self._firstSIMDreg)
+ buf = '%s, __ T%s' % (buf, self.arrangement)
+ current = self._firstSIMDreg
+ for cnt in range(1, self.numRegs):
+ buf = '%s, %s' % (buf, current.nextReg())
+ current = current.nextReg()
+ return '%s);' % (buf)
+
+ def astr(self):
+ buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement)
+ current = self._firstSIMDreg
+ for cnt in range(1, self.numRegs):
+ buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement)
+ current = current.nextReg()
+ return buf
+
+ def aname(self):
+ return self._name
+
class SHA512SIMDOp(Instruction):
def generate(self):
@@ -1053,6 +1110,44 @@ def astr(self):
+ ('\t%s, %s, %s.2D' % (self.reg[0].astr("q"),
self.reg[1].astr("q"), self.reg[2].astr("v"))))
+class SHA3SIMDOp(Instruction):
+
+ def generate(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ self.reg = [FloatRegister().generate(), FloatRegister().generate(),
+ FloatRegister().generate(), FloatRegister().generate()]
+ else:
+ self.reg = [FloatRegister().generate(), FloatRegister().generate(),
+ FloatRegister().generate()]
+ if (self._name == 'xar'):
+ self.imm6 = random.randint(0, 63)
+ return self
+
+ def cstr(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T16B, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.reg[3])))
+ elif (self._name == 'rax1'):
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T2D, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2])))
+ else:
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T2D, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.imm6)))
+
+ def astr(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.16B, %s.16B, %s.16B, %s.16B' % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v"), self.reg[3].astr("v"))))
+ elif (self._name == 'rax1'):
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.2D, %s.2D, %s.2D') % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v")))
+ else:
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.2D, %s.2D, %s.2D, #%s') % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v"), self.imm6))
+
class LSEOp(Instruction):
def __init__(self, args):
self._name, self.asmname, self.size, self.suffix = args
@@ -1097,6 +1192,12 @@ def aname(self):
def cname(self):
return self._cname
+class TwoRegNEONOp(CommonNEONInstruction):
+ numRegs = 2
+
+class ThreeRegNEONOp(TwoRegNEONOp):
+ numRegs = 3
+
class SpecialCases(Instruction):
def __init__(self, data):
self._name = data[0]
@@ -1129,6 +1230,7 @@ def generate(kind, names):
outfile = open("aarch64ops.s", "w")
+# To minimize the changes of assembler test code
random.seed(0)
print "// BEGIN Generated code -- do not edit"
@@ -1139,18 +1241,18 @@ def generate(kind, names):
outfile.write("back:\n")
-generate (ArithOp,
+generate (ArithOp,
[ "add", "sub", "adds", "subs",
"addw", "subw", "addsw", "subsw",
"and", "orr", "eor", "ands",
- "andw", "orrw", "eorw", "andsw",
- "bic", "orn", "eon", "bics",
+ "andw", "orrw", "eorw", "andsw",
+ "bic", "orn", "eon", "bics",
"bicw", "ornw", "eonw", "bicsw" ])
-generate (AddSubImmOp,
+generate (AddSubImmOp,
[ "addw", "addsw", "subw", "subsw",
"add", "adds", "sub", "subs"])
-generate (LogicalImmOp,
+generate (LogicalImmOp,
[ "andw", "orrw", "eorw", "andsw",
"and", "orr", "eor", "ands"])
@@ -1191,26 +1293,26 @@ def generate(kind, names):
["stxp", mode, 4], ["stlxp", mode, 4]])
for kind in range(6):
- print "\n// " + Address.kindToStr(kind),
+ sys.stdout.write("\n// " + Address.kindToStr(kind))
if kind != Address.pcrel:
- generate (LoadStoreOp,
- [["str", "str", kind, "x"], ["str", "str", kind, "w"],
+ generate (LoadStoreOp,
+ [["str", "str", kind, "x"], ["str", "str", kind, "w"],
["str", "strb", kind, "b"], ["str", "strh", kind, "h"],
- ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
+ ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
["ldr", "ldrb", kind, "b"], ["ldr", "ldrh", kind, "h"],
- ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
+ ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
["ldrsh", "ldrsh", kind, "w"], ["ldrsw", "ldrsw", kind, "x"],
- ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
- ["str", "str", kind, "d"], ["str", "str", kind, "s"],
+ ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
+ ["str", "str", kind, "d"], ["str", "str", kind, "s"],
])
else:
- generate (LoadStoreOp,
+ generate (LoadStoreOp,
[["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"]])
-
+
for kind in (Address.base_plus_unscaled_offset, Address.pcrel, Address.base_plus_reg, \
Address.base_plus_scaled_offset):
- generate (LoadStoreOp,
+ generate (LoadStoreOp,
[["prfm", "prfm\tPLDL1KEEP,", kind, "x"]])
generate(AddSubCarryOp, ["adcw", "adcsw", "sbcw", "sbcsw", "adc", "adcs", "sbc", "sbcs"])
@@ -1219,32 +1321,32 @@ def generate(kind, names):
generate(ConditionalCompareOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
generate(ConditionalCompareImmedOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
-generate(ConditionalSelectOp,
+generate(ConditionalSelectOp,
["cselw", "csincw", "csinvw", "csnegw", "csel", "csinc", "csinv", "csneg"])
-generate(TwoRegOp,
- ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
+generate(TwoRegOp,
+ ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
"rev16", "rev32", "rev", "clz", "cls"])
-generate(ThreeRegOp,
- ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
+generate(ThreeRegOp,
+ ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
"lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"])
-generate(FourRegMulOp,
+generate(FourRegMulOp,
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
-generate(ThreeRegFloatOp,
- [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
+generate(ThreeRegFloatOp,
+ [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
["fmuls", "sss"],
- ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
+ ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
["fmuld", "ddd"]])
-generate(FourRegFloatOp,
- [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
+generate(FourRegFloatOp,
+ [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
["fmaddd", "dddd"], ["fmsubd", "dddd"], ["fnmaddd", "dddd"], ["fnmaddd", "dddd"],])
-generate(TwoRegFloatOp,
- [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
+generate(TwoRegFloatOp,
+ [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
["fcvts", "ds"],
- ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
+ ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
["fcvtd", "sd"],
])
@@ -1255,18 +1357,18 @@ def generate(kind, names):
["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"],
["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]])
-generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
+generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
["fcmps", "sz"], ["fcmpd", "dz"]])
for kind in range(3):
generate(LoadStorePairOp, [["stp", "stp", kind, "w"], ["ldp", "ldp", kind, "w"],
- ["ldpsw", "ldpsw", kind, "x"],
+ ["ldpsw", "ldpsw", kind, "x"],
["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"]
])
generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"],
["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]])
-generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
+generate(LdStNEONOp, [["ld1", 1, "8B", Address.base_only],
["ld1", 2, "16B", Address.post],
["ld1", 3, "1D", Address.post_reg],
["ld1", 4, "8H", Address.post],
@@ -1290,7 +1392,92 @@ def generate(kind, names):
["ld4r", 4, "2S", Address.post_reg],
])
-generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
+generate(NEONReduceInstruction,
+ [["addv", "addv", "8B"], ["addv", "addv", "16B"],
+ ["addv", "addv", "4H"], ["addv", "addv", "8H"],
+ ["addv", "addv", "4S"],
+ ["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
+ ["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
+ ["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
+ ["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
+ ["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
+ ["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
+ ])
+
+generate(TwoRegNEONOp,
+ [["absr", "abs", "8B"], ["absr", "abs", "16B"],
+ ["absr", "abs", "4H"], ["absr", "abs", "8H"],
+ ["absr", "abs", "2S"], ["absr", "abs", "4S"],
+ ["absr", "abs", "2D"],
+ ["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"],
+ ["fabs", "fabs", "2D"],
+ ["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"],
+ ["fneg", "fneg", "2D"],
+ ["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"],
+ ["fsqrt", "fsqrt", "2D"],
+ ["notr", "not", "8B"], ["notr", "not", "16B"],
+ ])
+
+generate(ThreeRegNEONOp,
+ [["andr", "and", "8B"], ["andr", "and", "16B"],
+ ["orr", "orr", "8B"], ["orr", "orr", "16B"],
+ ["eor", "eor", "8B"], ["eor", "eor", "16B"],
+ ["addv", "add", "8B"], ["addv", "add", "16B"],
+ ["addv", "add", "4H"], ["addv", "add", "8H"],
+ ["addv", "add", "2S"], ["addv", "add", "4S"],
+ ["addv", "add", "2D"],
+ ["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
+ ["fadd", "fadd", "2D"],
+ ["subv", "sub", "8B"], ["subv", "sub", "16B"],
+ ["subv", "sub", "4H"], ["subv", "sub", "8H"],
+ ["subv", "sub", "2S"], ["subv", "sub", "4S"],
+ ["subv", "sub", "2D"],
+ ["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
+ ["fsub", "fsub", "2D"],
+ ["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
+ ["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
+ ["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
+ ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
+ ["fmul", "fmul", "2D"],
+ ["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
+ ["mlav", "mla", "2S"], ["mlav", "mla", "4S"],
+ ["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"],
+ ["fmla", "fmla", "2D"],
+ ["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"],
+ ["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"],
+ ["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"],
+ ["fmls", "fmls", "2D"],
+ ["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"],
+ ["fdiv", "fdiv", "2D"],
+ ["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
+ ["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
+ ["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
+ ["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
+ ["fmax", "fmax", "2D"],
+ ["minv", "smin", "8B"], ["minv", "smin", "16B"],
+ ["minv", "smin", "4H"], ["minv", "smin", "8H"],
+ ["minv", "smin", "2S"], ["minv", "smin", "4S"],
+ ["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
+ ["fmin", "fmin", "2D"],
+ ["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
+ ["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
+ ["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
+ ["cmeq", "cmeq", "2D"],
+ ["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
+ ["fcmeq", "fcmeq", "2D"],
+ ["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
+ ["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
+ ["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
+ ["cmgt", "cmgt", "2D"],
+ ["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
+ ["fcmgt", "fcmgt", "2D"],
+ ["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
+ ["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
+ ["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
+ ["cmge", "cmge", "2D"],
+ ["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
+ ["fcmge", "fcmge", "2D"],
+ ])
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
@@ -1344,9 +1531,9 @@ def generate(kind, names):
])
print "\n// FloatImmediateOp"
-for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
- "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
- "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
+for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
+ "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
+ "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
"-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
astr = "fmov d0, #" + float
cstr = "__ fmovd(v0, " + float + ");"
@@ -1366,6 +1553,11 @@ def generate(kind, names):
["ldumin", "ldumin", size, suffix],
["ldumax", "ldumax", size, suffix]]);
+# ARMv8.2A
+generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
+
+generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
+
generate(SVEVectorOp, [["add", "ZZZ"],
["sub", "ZZZ"],
["fadd", "ZZZ"],
@@ -1414,16 +1606,11 @@ def generate(kind, names):
outfile.close()
-import subprocess
-import sys
-
-# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
+# compile for sve with 8.2 and sha3 because of SHA3 crypto extension.
+subprocess.check_call([AARCH64_AS, "-march=armv8.2-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
print
-print "/*",
-sys.stdout.flush()
-subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
+print "/*"
print "*/"
subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
@@ -1444,4 +1631,7 @@ def generate(kind, names):
print "\n };"
print "// END Generated code -- do not edit"
+infile.close()
+for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin"]:
+ os.remove(f)
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index ede4040491e..ff82cd08cc1 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -614,9 +614,7 @@ alloc_class chunk3(RFLAGS);
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
-// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ )
-// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Class for all 32 bit general purpose registers
@@ -1755,7 +1753,9 @@ int MachCallDynamicJavaNode::ret_addr_offset()
int MachCallRuntimeNode::ret_addr_offset() {
// for generated stubs the call will be
- // far_call(addr)
+ // bl(addr)
+ // or with far branches
+ // bl(trampoline_stub)
// for real runtime callouts it will be six instructions
// see aarch64_enc_java_to_runtime
// adr(rscratch2, retaddr)
@@ -1764,7 +1764,7 @@ int MachCallRuntimeNode::ret_addr_offset() {
// blr(rscratch1)
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb) {
- return MacroAssembler::far_branch_size();
+ return 1 * NativeInstruction::instruction_size;
} else {
return 6 * NativeInstruction::instruction_size;
}
@@ -1966,9 +1966,10 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
}
if (do_polling() && C->is_method_compilation()) {
- st->print("# touch polling page\n\t");
- st->print("ldr rscratch1, [rthread],#polling_page_offset\n\t");
- st->print("ldr zr, [rscratch1]");
+ st->print("# test polling word\n\t");
+ st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
+ st->print("cmp sp, rscratch1\n\t");
+ st->print("bhi #slow_path");
}
}
#endif
@@ -1985,7 +1986,13 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
}
if (do_polling() && C->is_method_compilation()) {
- __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type);
+ Label dummy_label;
+ Label* code_stub = &dummy_label;
+ if (!C->output()->in_scratch_emit_size()) {
+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+ }
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
}
}
@@ -2403,6 +2410,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_MulVL:
return false;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
+ if (vlen < 4) {
+ return false;
+ }
+ break;
default:
break;
}
@@ -2414,6 +2427,10 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return true;
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2459,11 +2476,18 @@ const int Matcher::min_vector_size(const BasicType bt) {
if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
// Currently vector length less than SVE vector register size is not supported.
return max_size;
- } else {
- // For the moment limit the vector size to 8 bytes with NEON.
+ } else { // NEON
+ // Limit the vector size to 8 bytes
int size = 8 / type2aelembytes(bt);
+ if (bt == T_BYTE) {
+ // To support vector api shuffle/rearrange.
+ size = 4;
+ } else if (bt == T_BOOLEAN) {
+ // To support vector api load/store mask.
+ size = 2;
+ }
if (size < 2) size = 2;
- return size;
+ return MIN2(size,max_size);
}
}
@@ -2482,6 +2506,9 @@ const uint Matcher::vector_ideal_reg(int len) {
return Op_VecA;
}
switch(len) {
+ // For 16-bit/32-bit mask vector, reuse VecD.
+ case 2:
+ case 4:
case 8: return Op_VecD;
case 16: return Op_VecX;
}
@@ -2581,11 +2608,6 @@ const bool Matcher::rematerialize_float_constants = false;
// C code as the Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-// No-op on amd64
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
- Unimplemented();
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -3124,6 +3146,12 @@ encode %{
// END Non-volatile memory access
// Vector loads and stores
+ enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
+ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
@@ -3142,6 +3170,12 @@ encode %{
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
+ enc_class aarch64_enc_strvH(vecD src, memory mem) %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
+ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
@@ -3733,12 +3767,19 @@ encode %{
if (!_method) {
// A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
} else {
int method_index = resolved_method_index(cbuf);
RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
: static_call_Relocation::spec(method_index);
call = __ trampoline_call(Address(addr, rspec), &cbuf);
-
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
// Emit stub for static call
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
if (stub == NULL) {
@@ -3746,10 +3787,8 @@ encode %{
return;
}
}
- if (call == NULL) {
- ciEnv::current()->record_failure("CodeCache is full");
- return;
- } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
// Only non uncommon_trap calls need to reinitialize ptrue.
if (uncommon_trap_request() == 0) {
__ reinitialize_ptrue();
@@ -4051,9 +4090,6 @@ frame %{
// Inline Cache Register or Method for I2C.
inline_cache_reg(R12);
- // Method Register when calling interpreter.
- interpreter_method_reg(R12);
-
// Number of stack slots consumed by locking an object
sync_stack_slots(2);
@@ -4245,6 +4281,26 @@ operand immI_31()
interface(CONST_INTER);
%}
+operand immI_2()
+%{
+ predicate(n->get_int() == 2);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_4()
+%{
+ predicate(n->get_int() == 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
operand immI_8()
%{
predicate(n->get_int() == 8);
@@ -5621,16 +5677,6 @@ operand inline_cache_RegP(iRegP reg)
interface(REG_INTER);
%}
-operand interpreter_method_RegP(iRegP reg)
-%{
- constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_reg
- match(reg);
- match(iRegPNoSp);
- op_cost(0);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Thread Register
operand thread_RegP(iRegP reg)
%{
@@ -11215,6 +11261,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
%}
// BEGIN This section of the file is automatically generated. Do not edit --------------
+// This section is generated from aarch64_ad.m4
// This pattern is automatically generated from aarch64_ad.m4
@@ -14685,7 +14732,11 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
format %{ "ClearArray $cnt, $base" %}
ins_encode %{
- __ zero_words($base$$Register, $cnt$$Register);
+ address tpc = __ zero_words($base$$Register, $cnt$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
@@ -15963,8 +16014,8 @@ instruct CallStaticJavaDirect(method meth)
format %{ "call,static $meth \t// ==> " %}
- ins_encode( aarch64_enc_java_static_call(meth),
- aarch64_enc_call_epilog );
+ ins_encode(aarch64_enc_java_static_call(meth),
+ aarch64_enc_call_epilog);
ins_pipe(pipe_class_call);
%}
@@ -15982,8 +16033,8 @@ instruct CallDynamicJavaDirect(method meth)
format %{ "CALL,dynamic $meth \t// ==> " %}
- ins_encode( aarch64_enc_java_dynamic_call(meth),
- aarch64_enc_call_epilog );
+ ins_encode(aarch64_enc_java_dynamic_call(meth),
+ aarch64_enc_call_epilog);
ins_pipe(pipe_class_call);
%}
@@ -16369,15 +16420,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
ins_pipe(pipe_class_memory);
%}
-instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
ins_encode %{
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
@@ -16387,6 +16439,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
ins_pipe(pipe_class_memory);
%}
+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, rFlagsReg cr)
+%{
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+
+ ins_encode %{
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
iRegI_R0 result, rFlagsReg cr)
%{
@@ -16429,10 +16500,14 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
ins_encode %{
- __ arrays_equals($ary1$$Register, $ary2$$Register,
- $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
- $result$$Register, $tmp$$Register, 1);
- %}
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ $result$$Register, $tmp$$Register, 1);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ %}
ins_pipe(pipe_class_memory);
%}
@@ -16446,9 +16521,13 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
ins_encode %{
- __ arrays_equals($ary1$$Register, $ary2$$Register,
- $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
- $result$$Register, $tmp$$Register, 2);
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ $result$$Register, $tmp$$Register, 2);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
%}
@@ -16459,7 +16538,11 @@ instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg c
effect(USE_KILL ary1, USE_KILL len, KILL cr);
format %{ "has negatives byte[] $ary1,$len -> $result" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
+ address tpc = __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe( pipe_slow );
%}
@@ -16492,8 +16575,13 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
- __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
- $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
+ address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister,
+ $tmp3$$FloatRegister, $tmp4$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
%}
@@ -16821,6 +16909,7 @@ instruct replicate2D(vecX dst, vRegD src)
instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP tmp2);
@@ -16840,6 +16929,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp,
instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp);
@@ -16858,6 +16948,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iReg
instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP dst);
@@ -16877,6 +16968,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp, TEMP dst);
@@ -17958,8 +18050,7 @@ instruct vabs2F(vecD dst, vecD src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2S)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T2S,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp64);
%}
@@ -17971,8 +18062,7 @@ instruct vabs4F(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (4S)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T4S,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@@ -17984,8 +18074,7 @@ instruct vabs2D(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2D)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T2D,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@@ -18126,7 +18215,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2)
// ------------------------------ Shift ---------------------------------------
instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(n->as_Vector()->length_in_bytes() == 4 ||
+ n->as_Vector()->length_in_bytes() == 8);
match(Set dst (LShiftCntV cnt));
match(Set dst (RShiftCntV cnt));
format %{ "dup $dst, $cnt\t# shift count vector (8B)" %}
@@ -18834,6 +18924,216 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
ins_pipe(vshift128_imm);
%}
+instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (8B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) sh = 7;
+ __ ssra(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (16B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) sh = 7;
+ __ ssra(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (4H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) sh = 15;
+ __ ssra(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (8H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) sh = 15;
+ __ ssra(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (2S)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (4S)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (2D)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (8B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) {
+ __ eor(as_FloatRegister($src$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (16B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) {
+ __ eor(as_FloatRegister($src$$reg), __ T16B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (4H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) {
+ __ eor(as_FloatRegister($src$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ ushr(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (8H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) {
+ __ eor(as_FloatRegister($src$$reg), __ T16B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (2S)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (4S)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (2D)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
instruct vmax2F(vecD dst, vecD src1, vecD src2)
%{
predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
@@ -18950,12 +19250,12 @@ instruct vpopcount4I(vecX dst, vecX src) %{
"uaddlp $dst, $dst\t# vector (8H)"
%}
ins_encode %{
- __ cnt(as_FloatRegister($dst$$reg), __ T16B,
- as_FloatRegister($src$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
- as_FloatRegister($dst$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
- as_FloatRegister($dst$$reg));
+ __ cnt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}
@@ -18969,12 +19269,12 @@ instruct vpopcount2I(vecD dst, vecD src) %{
"uaddlp $dst, $dst\t# vector (4H)"
%}
ins_encode %{
- __ cnt(as_FloatRegister($dst$$reg), __ T8B,
- as_FloatRegister($src$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
- as_FloatRegister($dst$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
- as_FloatRegister($dst$$reg));
+ __ cnt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_ad.m4
index 5893f451459..ac1b6dfec65 100644
--- a/src/hotspot/cpu/aarch64/aarch64_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_ad.m4
@@ -1,4 +1,4 @@
-dnl Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+dnl Copyright (c) 2019, 2020, Red Hat Inc. All rights reserved.
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
dnl
dnl This code is free software; you can redistribute it and/or modify it
@@ -19,10 +19,14 @@ dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
dnl or visit www.oracle.com if you need additional information or have any
dnl questions.
dnl
-dnl
-dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
-dnl and shift patterns patterns used in aarch64.ad.
dnl
+dnl Process this file with m4 aarch64_ad.m4 to generate instructions used in
+dnl aarch64.ad:
+dnl 1. the arithmetic
+dnl 2. shift patterns
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+// This section is generated from aarch64_ad.m4
dnl
define(`ORL2I', `ifelse($1,I,orL2I)')
dnl
diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad
new file mode 100644
index 00000000000..33b1a869cc3
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad
@@ -0,0 +1,3456 @@
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
+
+// AArch64 NEON Architecture Description File
+
+// ====================VECTOR INSTRUCTIONS==================================
+
+// ------------------------------ Load/store/reinterpret -----------------------
+
+// Load vector (16 bits)
+instruct loadV2(vecD dst, memory mem)
+%{
+ predicate(n->as_LoadVector()->memory_size() == 2);
+ match(Set dst (LoadVector mem));
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrh $dst,$mem\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_ldrvH(dst, mem) );
+ ins_pipe(vload_reg_mem64);
+%}
+
+// Store Vector (16 bits)
+instruct storeV2(vecD src, memory mem)
+%{
+ predicate(n->as_StoreVector()->memory_size() == 2);
+ match(Set mem (StoreVector mem src));
+ ins_cost(4 * INSN_COST);
+ format %{ "strh $mem,$src\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_strvH(src, mem) );
+ ins_pipe(vstore_reg_mem64);
+%}
+
+instruct reinterpretD(vecD dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretX(vecX dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretD2X(vecX dst, vecD src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}
+
+instruct reinterpretX2D(vecD dst, vecX src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}
+
+// ------------------------------ Vector cast -------------------------------
+
+instruct vcvt4Bto4S(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\t# convert 4B to 4S vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt8Bto8S(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\t# convert 8B to 8S vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Sto4B(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastS2X src));
+ format %{ "xtn $dst, T8B, $src, T8H\t# convert 4S to 4B vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt8Sto8B(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastS2X src));
+ format %{ "xtn $dst, T8B, $src, T8H\t# convert 8S to 8B vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Sto4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastS2X src));
+ format %{ "sxtl $dst, T4S, $src, T4H\t# convert 4S to 4I vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Ito4S(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "xtn $dst, T4H, $src, T4S\t# convert 4I to 4S vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Ito2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorCastI2X src));
+ format %{ "sxtl $dst, T2D, $src, T2S\t# convert 2I to 2L vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2I(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "xtn $dst, T2S, $src, T2D\t# convert 2L to 2I vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Bto4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\t# convert 4B to 4I vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Ito4B(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastI2X src));
+ format %{ "xtn $dst, T4H, $src, T4S\n\t"
+ "xtn $dst, T8B, $dst, T8H\t# convert 4I to 4B vector"
+ %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Bto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Sto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastS2X src));
+ format %{ "sxtl $dst, T4S, $src, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4S to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt2Ito2D(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastI2X src));
+ format %{ "sxtl $dst, T2D, $src, T2S\n\t"
+ "scvtfv T2D, $dst, $dst\t# convert 2I to 2D vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt2Ito2F(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "scvtfv T2S, $dst, $src\t# convert 2I to 2F vector" %}
+ ins_encode %{
+ __ scvtfv(__ T2S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Ito4F(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "scvtfv T4S, $dst, $src\t# convert 4I to 4F vector" %}
+ ins_encode %{
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2D(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\t# convert 2L to 2D vector" %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Fto2D(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastF2X src));
+ format %{ "fcvtl $dst, T2D, $src, T2S\t# convert 2F to 2D vector" %}
+ ins_encode %{
+ __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Dto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastD2X src));
+ format %{ "fcvtn $dst, T2S, $src, T2D\t# convert 2D to 2F vector" %}
+ ins_encode %{
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\n\t"
+ "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
+ %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Reduction -------------------------------
+
+instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxtb $dst, $dst\t# add reduction8B"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxtb $dst, $dst\t# add reduction16B"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxth $dst, $dst\t# add reduction4S"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxth $dst, $dst\t# add reduction8S"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
+%{
+ match(Set dst (AddReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addpd $tmp, $vsrc\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $isrc, $dst\t# add reduction2L"
+ %}
+ ins_encode %{
+ __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction8B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction16B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
+ format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp, T4H, $vtmp, $vsrc\n\t"
+ "umov $itmp, $vtmp, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction4S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
+ as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction8S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ match(Set dst (MulReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "mul $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "mul $dst, $dst, $tmp\t# mul reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction8B"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction16B"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction4S"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction8S"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T4S, $vsrc\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction4I"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction8B"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction16B"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction4S"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction8S"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T4S, $vsrc\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction4I"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "smaxv $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "sminv $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp GT\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp GT\t# max reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::GT);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp LT\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp LT\t# min reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::LT);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "andw $dst, $dst, $tmp\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $dst, $dst, LSR #8\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# and reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "orrw $dst, $dst, $tmp\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $dst, $dst, LSR #8\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# orr reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "eorw $dst, $dst, $tmp\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $dst, $dst, LSR #8\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# eor reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $dst, $dst, LSR #8\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# and reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $dst, $dst, LSR #8\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# orr reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $dst, $dst, LSR #8\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# eor reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "andw $dst, $dst, $tmp\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# and reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "orrw $dst, $dst, $tmp\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# orr reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "eorw $dst, $dst, $tmp\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# eor reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# and reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# orr reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# eor reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "andw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "andw $dst, $tmp, $dst\t# and reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ andw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "orrw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "orrw $dst, $tmp, $dst\t# orr reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ orrw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "eorw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "eorw $dst, $tmp, $dst\t# eor reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ eorw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $isrc, $dst\t# and reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $isrc, $dst\t# orr reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $isrc, $dst\t# eor reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "andr $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\t# and reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ andr($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "orr $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\t# orr reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ orr ($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "eor $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\t# eor reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ eor ($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector insert ---------------------------------
+
+instruct insert8B(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T8B, $idx, $val\t# insert into vector(8B)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T8B, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert16B(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T16B, $idx, $val\t# insert into vector(16B)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T16B, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4S(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T4H, $idx, $val\t# insert into vector(4S)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T4H, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert8S(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T8H, $idx, $val\t# insert into vector(8S)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T8H, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2I(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T2S, $idx, $val\t# insert into vector(2I)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T2S, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4I(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T4S, $idx, $val\t# insert into vector(4I)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T4S, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2L(vecX dst, vecX src, iRegL val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T2D, $idx, $val\t# insert into vector(2L)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2F(vecD dst, vecD src, vRegF val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "ins $dst, S, $val, $idx, 0\t# insert into vector(2F)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4F(vecX dst, vecX src, vRegF val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "ins $dst, S, $val, $idx, 0\t# insert into vector(4F)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2D(vecX dst, vecX src, vRegD val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "ins $dst, D, $val, $idx, 0\t# insert into vector(2D)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector extract ---------------------------------
+
+instruct extract8B(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
+ match(Set dst (ExtractB src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, B, $idx\t# extract from vector(8B)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract16B(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 16);
+ match(Set dst (ExtractB src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, B, $idx\t# extract from vector(16B)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4S(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractS src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, H, $idx\t# extract from vector(4S)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract8S(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
+ match(Set dst (ExtractS src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, H, $idx\t# extract from vector(8S)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2I(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractI src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, S, $idx\t# extract from vector(2I)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4I(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractI src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, S, $idx\t# extract from vector(4I)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2L(iRegLNoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractL src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, D, $idx\t# extract from vector(2L)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ D, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2F(vRegF dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractF src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(2F)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4F(vRegF dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractF src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(4F)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2D(vRegD dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractD src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, D, $src, 0, $idx\t# extract from vector(2D)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+// ------------------------------ Vector comparison ---------------------------------
+
+instruct vcmeq8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmne8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8B)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (16B)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4S)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8S)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2I)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4I)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2L)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2F)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (4F)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2D)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmlt8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+// ------------------------------ Vector mul -----------------------------------
+
+instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVL src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp1, TEMP tmp2);
+ format %{ "umov $tmp1, $src1, D, 0\n\t"
+ "umov $tmp2, $src2, D, 0\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t"
+ "umov $tmp1, $src1, D, 1\n\t"
+ "umov $tmp2, $src2, D, 1\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t"
+ %}
+ ins_encode %{
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// --------------------------------- Vector not --------------------------------
+
+instruct vnot2I(vecD dst, vecD src, immI_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (8B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vnot4I(vecX dst, vecX src, immI_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (16B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src (ReplicateL m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (16B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+// ------------------------------ Vector max/min -------------------------------
+
+instruct vmax8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (8B)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmax4S(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (4S)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax8S(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (8S)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmax2I(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (2I)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax4I(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (4I)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (8B)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin4S(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (4S)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin8S(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (8S)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin2I(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (2I)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin4I(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (4I)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+
+instruct vmax2L(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin2L(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $src2, $src1\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+// --------------------------------- blend (bsl) ----------------------------
+
+instruct vbsl8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector (8B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical64);
+%}
+
+instruct vbsl16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector (16B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical128);
+%}
+
+// --------------------------------- Load/store Mask ----------------------------
+
+instruct loadmask8B(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# load mask (8B to 8B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadmask16B(vecX dst, vecX src )
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# load mask (16B to 16B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct storemask8B(vecD dst, vecD src , immI_1 size)
+%{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# store mask (8B to 8B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct storemask16B(vecX dst, vecX src , immI_1 size)
+%{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# store mask (16B to 16B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadmask4S(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\n\t"
+ "negr $dst, $dst\t# load mask (4B to 4H)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask8S(vecX dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\n\t"
+ "negr $dst, $dst\t# load mask (8B to 8H)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask4S(vecD dst, vecD src , immI_2 size)
+%{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\n\t"
+ "negr $dst, $dst\t# store mask (4H to 4B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask8S(vecD dst, vecX src , immI_2 size)
+%{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\n\t"
+ "negr $dst, $dst\t# store mask (8H to 8B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask2I(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2H\n\t"
+ "uxtl $dst, $dst\t# 2H to 2S\n\t"
+ "negr $dst, $dst\t# load mask (2B to 2S)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask4I(vecX dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H\n\t"
+ "uxtl $dst, $dst\t# 4H to 4S\n\t"
+ "negr $dst, $dst\t# load mask (4B to 4S)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2I(vecD dst, vecD src , immI_4 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2S to 2H\n\t"
+ "xtn $dst, $dst\t# 2H to 2B\n\t"
+ "negr $dst, $dst\t# store mask (2S to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask4I(vecD dst, vecX src , immI_4 size)
+%{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 4S to 4H\n\t"
+ "xtn $dst, $dst\t# 4H to 4B\n\t"
+ "negr $dst, $dst\t# store mask (4S to 4B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+ match(Set dst (VectorLoadMask src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2S\n\t"
+ "uxtl $dst, $dst\t# 2S to 2I\n\t"
+ "uxtl $dst, $dst\t# 2I to 2L\n\t"
+ "neg $dst, $dst\t# load mask (2B to 2L)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
+ __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2L(vecD dst, vecX src, immI_8 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2L to 2I\n\t"
+ "xtn $dst, $dst\t# 2I to 2S\n\t"
+ "xtn $dst, $dst\t# 2S to 2B\n\t"
+ "neg $dst, $dst\t# store mask (2L to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- LOAD_IOTA_INDICES----------------------------------
+
+instruct loadcon8B(vecD dst, immI0 src)
+%{
+ predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
+ n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldrd(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct loadcon16B(vecX dst, immI0 src)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldrq(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+//-------------------------------- LOAD_SHUFFLE ----------------------------------
+
+instruct loadshuffle8B(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get 8B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle16B(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get 16B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle4S(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle8S(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 8B to 8H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H \n\t"
+ "uxtl $dst, $dst\t# 4H to 4S" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Rearrange -------------------------------------
+// Here is an example that rearranges a NEON vector with 4 ints:
+// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
+// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
+// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
+// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
+// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
+// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
+// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
+// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
+// 6. Use Vm as index register, and use V1 as table register.
+// Then get V2 as the result by tbl NEON instructions.
+// Notes:
+// Step 1 matches VectorLoadConst.
+// Step 3 matches VectorLoadShuffle.
+// Step 4, 5, 6 match VectorRearrange.
+// For VectorRearrange short/int, the reason why such complex calculation is
+// required is because NEON tbl supports bytes table only, so for short/int, we
+// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
+// to implement rearrange.
+
+instruct rearrange8B(vecD dst, vecD src, vecD shuffle)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 8B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange16B(vecX dst, vecX src, vecX shuffle)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 16B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange4S(vecD dst, vecD src, vecD shuffle, vecD tmp0, vecD tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T4H, $shuffle, $tmp0\n\t"
+ "addv $dst, T8B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T8B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange8S(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 8S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T8H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4I" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Anytrue/alltrue -----------------------------
+
+instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T8B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T16B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T8B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T8B, $tmp\n\t"
+ "addv $tmp, T8B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T16B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T16B, $tmp\n\t"
+ "addv $tmp, T16B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
new file mode 100644
index 00000000000..0b1dc5cb7c6
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
@@ -0,0 +1,1424 @@
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+dnl Generate the warning
+// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
+dnl
+
+// AArch64 NEON Architecture Description File
+
+dnl
+define(`ORL2I', `ifelse($1,I,orL2I)')dnl
+dnl
+define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl
+dnl
+define(`iTYPE2SIMD',
+`ifelse($1, `B', `B',
+ $1, `S', `H',
+ $1, `I', `S',
+ $1, `L', `D',
+ `error($1)')')dnl
+dnl
+define(`fTYPE2SIMD',
+`ifelse($1, `F', `S',
+ $1, `D', `D',
+ `error($1)')')dnl
+dnl
+define(`TYPE2DATATYPE',
+`ifelse($1, `B', `BYTE',
+ $1, `S', `SHORT',
+ $1, `I', `INT',
+ $1, `L', `LONG',
+ $1, `F', `FLOAT',
+ $1, `D', `DOUBLE',
+ `error($1)')')dnl
+dnl
+// ====================VECTOR INSTRUCTIONS==================================
+
+// ------------------------------ Load/store/reinterpret -----------------------
+
+// Load vector (16 bits)
+instruct loadV2(vecD dst, memory mem)
+%{
+ predicate(n->as_LoadVector()->memory_size() == 2);
+ match(Set dst (LoadVector mem));
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrh $dst,$mem\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_ldrvH(dst, mem) );
+ ins_pipe(vload_reg_mem64);
+%}
+
+// Store Vector (16 bits)
+instruct storeV2(vecD src, memory mem)
+%{
+ predicate(n->as_StoreVector()->memory_size() == 2);
+ match(Set mem (StoreVector mem src));
+ ins_cost(4 * INSN_COST);
+ format %{ "strh $mem,$src\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_strvH(src, mem) );
+ ins_pipe(vstore_reg_mem64);
+%}
+dnl
+define(`REINTERPRET', `
+instruct reinterpret$1`'(vec$1 dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}')dnl
+dnl $1 $2
+REINTERPRET(D, 8)
+REINTERPRET(X, 16)
+dnl
+define(`REINTERPRET_X', `
+instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}')dnl
+dnl $1 $2 $3 $4
+REINTERPRET_X(D, X, 16, 8)
+REINTERPRET_X(X, D, 8, 16)
+dnl
+
+// ------------------------------ Vector cast -------------------------------
+dnl
+define(`VECTOR_CAST_I2I', `
+instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "$6 $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8
+VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H)
+VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H)
+VECTOR_CAST_I2I(4, S, B, D, D, xtn, 8H, 8B)
+VECTOR_CAST_I2I(8, S, B, D, X, xtn, 8H, 8B)
+VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S)
+VECTOR_CAST_I2I(4, I, S, D, X, xtn, 4S, 4H)
+VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D)
+VECTOR_CAST_I2I(2, L, I, D, X, xtn, 2D, 2S)
+dnl
+define(`VECTOR_CAST_B2I', `
+instruct vcvt4$1to4$2`'(vec$3 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorCast$1`'2X src));
+ format %{ "$5 $dst, T$7, $src, T$6\n\t"
+ "$5 $dst, T$9, $dst, T$8\t# convert 4$1 to 4$2 vector"
+ %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
+ __ $5(as_FloatRegister($dst$$reg), __ T$9, as_FloatRegister($dst$$reg), __ T$8);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
+VECTOR_CAST_B2I(B, I, X, D, sxtl, 8B, 8H, 4H, 4S)
+VECTOR_CAST_B2I(I, B, D, X, xtn, 4S, 4H, 8H, 8B)
+
+instruct vcvt4Bto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+dnl
+define(`VECTOR_CAST_I2F_L', `
+instruct vcvt$1$2to$1$3`'(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "sxtl $dst, T$5, $src, T$4\n\t"
+ "scvtfv T$5, $dst, $dst\t# convert $1$2 to $1$3 vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg), __ T$4);
+ __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_CAST_I2F_L(4, S, F, 4H, 4S)
+VECTOR_CAST_I2F_L(2, I, D, 2S, 2D)
+dnl
+define(`VECTOR_CAST_I2F', `
+instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "scvtfv T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %}
+ ins_encode %{
+ __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_CAST_I2F(2, I, F, D, 2S)
+VECTOR_CAST_I2F(4, I, F, X, 4S)
+VECTOR_CAST_I2F(2, L, D, X, 2D)
+dnl
+define(`VECTOR_CAST_F2F', `
+instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorCast$1`'2X src));
+ format %{ "$5 $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D)
+VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S)
+dnl
+
+instruct vcvt2Lto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\n\t"
+ "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
+ %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Reduction -------------------------------
+dnl
+define(`REDUCE_ADD_BORS', `
+instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t"
+ "smov $dst, $tmp, iTYPE2SIMD($2), 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxt$4 $dst, $dst\t# add reduction$1$2"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxt$4($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4
+REDUCE_ADD_BORS(8, B, D, b)
+REDUCE_ADD_BORS(16, B, X, b)
+REDUCE_ADD_BORS(4, S, D, h)
+REDUCE_ADD_BORS(8, S, X, h)
+dnl
+
+instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
+%{
+ match(Set dst (AddReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addpd $tmp, $vsrc\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $isrc, $dst\t# add reduction2L"
+ %}
+ ins_encode %{
+ __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction8B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction16B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
+ format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp, T4H, $vtmp, $vsrc\n\t"
+ "umov $itmp, $vtmp, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction4S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
+ as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction8S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ match(Set dst (MulReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "mul $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "mul $dst, $dst, $tmp\t# mul reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+dnl
+define(`REDUCE_MAX_MIN_INT', `
+instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst ($5ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t"
+ "$6mov $dst, $tmp, iTYPE2SIMD($3), 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3"
+ %}
+ ins_encode %{
+ __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg));
+ __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+REDUCE_MAX_MIN_INT(max, 8, B, D, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 4, S, D, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 8, S, X, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 4, I, X, Max, u, GT)
+REDUCE_MAX_MIN_INT(min, 8, B, D, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 4, S, D, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 8, S, X, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 4, I, X, Min, u, LT)
+dnl
+define(`REDUCE_MAX_MIN_2I', `
+instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "s$1v $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc $3\t# $1 reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ s$1v(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_MAX_MIN_2I(max, Max, GT)
+REDUCE_MAX_MIN_2I(min, Min, LT)
+dnl
+define(`REDUCE_MAX_MIN_2L', `
+instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp $3\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp $3\t# $1 reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_MAX_MIN_2L(max, Max, GT)
+REDUCE_MAX_MIN_2L(min, Min, LT)
+dnl
+define(`REDUCE_LOGIC_OP_8B', `
+instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "$1w $dst, $dst, $tmp\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $dst, $dst, LSR #8\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# $1 reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_8B(and, And)
+REDUCE_LOGIC_OP_8B(orr, Or)
+REDUCE_LOGIC_OP_8B(eor, Xor)
+define(`REDUCE_LOGIC_OP_16B', `
+instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $dst, $dst, LSR #8\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# $1 reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_16B(and, And, andr)
+REDUCE_LOGIC_OP_16B(orr, Or, orr )
+REDUCE_LOGIC_OP_16B(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_4S', `
+instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "$1w $dst, $dst, $tmp\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# $1 reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_4S(and, And)
+REDUCE_LOGIC_OP_4S(orr, Or)
+REDUCE_LOGIC_OP_4S(eor, Xor)
+dnl
+define(`REDUCE_LOGIC_OP_8S', `
+instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# $1 reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_8S(and, And, andr)
+REDUCE_LOGIC_OP_8S(orr, Or, orr )
+REDUCE_LOGIC_OP_8S(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_2I', `
+instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "$1w $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "$1w $dst, $tmp, $dst\t# $1 reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_2I(and, And)
+REDUCE_LOGIC_OP_2I(orr, Or)
+REDUCE_LOGIC_OP_2I(eor, Xor)
+dnl
+define(`REDUCE_LOGIC_OP_4I', `
+instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $isrc, $dst\t# $1 reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_4I(and, And, andr)
+REDUCE_LOGIC_OP_4I(orr, Or, orr )
+REDUCE_LOGIC_OP_4I(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_2L', `
+instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "$3 $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\t# $1 reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ $3($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_2L(and, And, andr)
+REDUCE_LOGIC_OP_2L(orr, Or, orr )
+REDUCE_LOGIC_OP_2L(eor, Xor, eor )
+dnl
+
+// ------------------------------ Vector insert ---------------------------------
+define(`VECTOR_INSERT_I', `
+instruct insert$1$2`'(vec$3 dst, vec$3 src, iReg$4`'ORL2I($4) val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T$5, $src, $src\n\t"
+ "mov $dst, T$1`'iTYPE2SIMD($2), $idx, $val\t# insert into vector($1$2)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T$5,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T$1`'iTYPE2SIMD($2), $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_INSERT_I(8, B, D, I, 8B)
+VECTOR_INSERT_I(16, B, X, I, 16B)
+VECTOR_INSERT_I(4, S, D, I, 8B)
+VECTOR_INSERT_I(8, S, X, I, 16B)
+VECTOR_INSERT_I(2, I, D, I, 8B)
+VECTOR_INSERT_I(4, I, X, I, 16B)
+VECTOR_INSERT_I(2, L, X, L, 16B)
+dnl
+define(`VECTOR_INSERT_F', `
+instruct insert$1`'(vec$2 dst, vec$2 src, vReg$3 val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T$4, $src, $src\n\t"
+ "ins $dst, $5, $val, $idx, 0\t# insert into vector($1)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T$4,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ $5,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_INSERT_F(2F, D, F, 8B, S)
+VECTOR_INSERT_F(4F, X, F, 16B, S)
+VECTOR_INSERT_F(2D, X, D, 16B, D)
+dnl
+
+// ------------------------------ Vector extract ---------------------------------
+define(`VECTOR_EXTRACT_I', `
+instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
+ match(Set dst (Extract$2 src idx));
+ ins_cost(INSN_COST);
+ format %{ "$5mov $dst, $src, $6, $idx\t# extract from vector($1$2)" %}
+ ins_encode %{
+ __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_EXTRACT_I(8, B, I, D, s, B)
+VECTOR_EXTRACT_I(16, B, I, X, s, B)
+VECTOR_EXTRACT_I(4, S, I, D, s, H)
+VECTOR_EXTRACT_I(8, S, I, X, s, H)
+VECTOR_EXTRACT_I(2, I, I, D, u, S)
+VECTOR_EXTRACT_I(4, I, I, X, u, S)
+VECTOR_EXTRACT_I(2, L, L, X, u, D)
+dnl
+define(`VECTOR_EXTRACT_F', `
+instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
+ match(Set dst (Extract$2 src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ $4,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4
+VECTOR_EXTRACT_F(2, F, D, S)
+VECTOR_EXTRACT_F(4, F, X, S)
+VECTOR_EXTRACT_F(2, D, X, D)
+dnl
+
+// ------------------------------ Vector comparison ---------------------------------
+define(`VECTOR_CMP_EQ_GT_GE', `
+instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$6cm$1 $dst, $src1, $src2\t# vector cmp ($2$3)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $6cm$1(as_FloatRegister($dst$$reg), __ T$2$5,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop$7);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+VECTOR_CMP_EQ_GT_GE(eq, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(eq, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, D, X, D, f, 128)
+VECTOR_CMP_EQ_GT_GE(gt, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(gt, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, D, X, D, f, 128)
+VECTOR_CMP_EQ_GT_GE(ge, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(ge, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, D, X, D, f, 128)
+dnl
+define(`VECTOR_CMP_NE', `
+instruct vcmne$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$5cmeq $dst, $src1, $src2\n\t# vector cmp ($1$2)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $5cmeq(as_FloatRegister($dst$$reg), __ T$1$4,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T$6, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_CMP_NE(8, B, D, B, , 8B)
+VECTOR_CMP_NE(16,B, X, B, , 16B)
+VECTOR_CMP_NE(4, S, D, H, , 8B)
+VECTOR_CMP_NE(8, S, X, H, , 16B)
+VECTOR_CMP_NE(2, I, D, S, , 8B)
+VECTOR_CMP_NE(4, I, X, S, , 16B)
+VECTOR_CMP_NE(2, L, X, D, , 16B)
+VECTOR_CMP_NE(2, F, D, S, f, 8B)
+VECTOR_CMP_NE(4, F, X, S, f, 16B)
+VECTOR_CMP_NE(2, D, X, D, f, 16B)
+dnl
+define(`VECTOR_CMP_LT_LE', `
+instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$6cm$7 $dst, $src2, $src1\t# vector cmp ($2$3)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $6cm$7(as_FloatRegister($dst$$reg), __ T$2$5,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop$8);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8
+VECTOR_CMP_LT_LE(lt, 8, B, D, B, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 16,B, X, B, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 4, S, D, H, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 8, S, X, H, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, I, D, S, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 4, I, X, S, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, L, X, D, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, F, D, S, f, gt, 64)
+VECTOR_CMP_LT_LE(lt, 4, F, X, S, f, gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, D, X, D, f, gt, 128)
+VECTOR_CMP_LT_LE(le, 8, B, D, B, , ge, 64)
+VECTOR_CMP_LT_LE(le, 16,B, X, B, , ge, 128)
+VECTOR_CMP_LT_LE(le, 4, S, D, H, , ge, 64)
+VECTOR_CMP_LT_LE(le, 8, S, X, H, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, I, D, S, , ge, 64)
+VECTOR_CMP_LT_LE(le, 4, I, X, S, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, L, X, D, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, F, D, S, f, ge, 64)
+VECTOR_CMP_LT_LE(le, 4, F, X, S, f, ge, 128)
+VECTOR_CMP_LT_LE(le, 2, D, X, D, f, ge, 128)
+dnl
+
+// ------------------------------ Vector mul -----------------------------------
+
+instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVL src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp1, TEMP tmp2);
+ format %{ "umov $tmp1, $src1, D, 0\n\t"
+ "umov $tmp2, $src2, D, 0\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t"
+ "umov $tmp1, $src1, D, 1\n\t"
+ "umov $tmp2, $src2, D, 1\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t"
+ %}
+ ins_encode %{
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// --------------------------------- Vector not --------------------------------
+dnl
+define(`MATCH_RULE', `ifelse($1, I,
+`match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));',
+`match(Set dst (XorV src (ReplicateL m1)));')')dnl
+dnl
+define(`VECTOR_NOT', `
+instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == $4);
+ MATCH_RULE($2)
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector ($5)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T$5,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_NOT(2, I, D, 8, 8B)
+VECTOR_NOT(4, I, X, 16, 16B)
+VECTOR_NOT(2, L, X, 16, 16B)
+undefine(MATCH_RULE)
+dnl
+// ------------------------------ Vector max/min -------------------------------
+dnl
+define(`PREDICATE', `ifelse($1, 8B,
+`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl
+dnl
+define(`VECTOR_MAX_MIN_INT', `
+instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
+%{
+ PREDICATE(`$2$3', $2, TYPE2DATATYPE($3))
+ match(Set dst ($5V src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "$1v $dst, $src1, $src2\t# vector ($2$3)" %}
+ ins_encode %{
+ __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop$6);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_MAX_MIN_INT(max, 8, B, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128)
+VECTOR_MAX_MIN_INT(max, 4, S, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 8, S, X, Max, 128)
+VECTOR_MAX_MIN_INT(max, 2, I, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 4, I, X, Max, 128)
+VECTOR_MAX_MIN_INT(min, 8, B, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128)
+VECTOR_MAX_MIN_INT(min, 4, S, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 8, S, X, Min, 128)
+VECTOR_MAX_MIN_INT(min, 2, I, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 4, I, X, Min, 128)
+undefine(PREDICATE)
+dnl
+define(`VECTOR_MAX_MIN_LONG', `
+instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2V src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $$3, $$4\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg));
+ %}
+ ins_pipe(vdop128);
+%}')dnl
+dnl $1 $2 $3 $4
+VECTOR_MAX_MIN_LONG(max, Max, src1, src2)
+VECTOR_MAX_MIN_LONG(min, Min, src2, src1)
+dnl
+
+// --------------------------------- blend (bsl) ----------------------------
+dnl
+define(`VECTOR_BSL', `
+instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == $1);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector ($1B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical$3);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_BSL(8, D, 64)
+VECTOR_BSL(16, X, 128)
+dnl
+
+// --------------------------------- Load/store Mask ----------------------------
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_B', `
+instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$4Mask src $6));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# $1 mask ($2B to $2B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_LOAD_STORE_MASK_B(load, 8, D, Load)
+VECTOR_LOAD_STORE_MASK_B(load, 16, X, Load)
+VECTOR_LOAD_STORE_MASK_B(store, 8, D, Store, `, immI_1', size)
+VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size)
+undefine(PREDICATE)dnl
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_S', `
+instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$5Mask src $10));
+ ins_cost(INSN_COST);
+ format %{ "$6 $dst, $src\n\t"
+ "negr $dst, $dst\t# $1 mask ($2$7 to $2$8)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7);
+ __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10
+VECTOR_LOAD_STORE_MASK_S(load, 4, D, D, Load, uxtl, B, H)
+VECTOR_LOAD_STORE_MASK_S(load, 8, X, D, Load, uxtl, B, H)
+VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn, H, B, `, immI_2', size)
+VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn, H, B, `, immI_2', size)
+undefine(PREDICATE)dnl
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_I', `
+instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$5Mask src $13));
+ ins_cost(INSN_COST);
+ format %{ "$6 $dst, $src\t# $2$7 to $2$8\n\t"
+ "$6 $dst, $dst\t# $2$8 to $2$9\n\t"
+ "negr $dst, $dst\t# $1 mask ($2$7 to $2$9)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7);
+ __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8);
+ __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10$11 $12 $13
+VECTOR_LOAD_STORE_MASK_I(load, 2, D, D, Load, uxtl, B, H, S, 8, 4)
+VECTOR_LOAD_STORE_MASK_I(load, 4, X, D, Load, uxtl, B, H, S, 8, 4)
+VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn, S, H, B, 4, 8, `, immI_4', size)
+VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn, S, H, B, 4, 8, `, immI_4', size)
+undefine(PREDICATE)
+dnl
+instruct loadmask2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+ match(Set dst (VectorLoadMask src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2S\n\t"
+ "uxtl $dst, $dst\t# 2S to 2I\n\t"
+ "uxtl $dst, $dst\t# 2I to 2L\n\t"
+ "neg $dst, $dst\t# load mask (2B to 2L)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
+ __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2L(vecD dst, vecX src, immI_8 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2L to 2I\n\t"
+ "xtn $dst, $dst\t# 2I to 2S\n\t"
+ "xtn $dst, $dst\t# 2S to 2B\n\t"
+ "neg $dst, $dst\t# store mask (2L to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- LOAD_IOTA_INDICES----------------------------------
+dnl
+define(`PREDICATE', `ifelse($1, 8,
+`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
+ n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
+dnl
+define(`VECTOR_LOAD_CON', `
+instruct loadcon$1B`'(vec$2 dst, immI0 src)
+%{
+ PREDICATE($1)
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldr$3(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_LOAD_CON(8, D, d)
+VECTOR_LOAD_CON(16, X, q)
+undefine(PREDICATE)
+dnl
+//-------------------------------- LOAD_SHUFFLE ----------------------------------
+dnl
+define(`VECTOR_LOAD_SHUFFLE_B', `
+instruct loadshuffle$1B`'(vec$2 dst, vec$2 src)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get $1B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2
+VECTOR_LOAD_SHUFFLE_B(8, D)
+VECTOR_LOAD_SHUFFLE_B(16, X)
+dnl
+define(`VECTOR_LOAD_SHUFFLE_S', `
+instruct loadshuffle$1S`'(vec$2 dst, vec$3 src)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# $1B to $1H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_LOAD_SHUFFLE_S(4, D, D)
+VECTOR_LOAD_SHUFFLE_S(8, X, D)
+dnl
+
+instruct loadshuffle4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H \n\t"
+ "uxtl $dst, $dst\t# 4H to 4S" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Rearrange -------------------------------------
+// Here is an example that rearranges a NEON vector with 4 ints:
+// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
+// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
+// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
+// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
+// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
+// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
+// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
+// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
+// 6. Use Vm as index register, and use V1 as table register.
+// Then get V2 as the result by tbl NEON instructions.
+// Notes:
+// Step 1 matches VectorLoadConst.
+// Step 3 matches VectorLoadShuffle.
+// Step 4, 5, 6 match VectorRearrange.
+// For VectorRearrange short/int, the reason why such complex calculation is
+// required is because NEON tbl supports bytes table only, so for short/int, we
+// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
+// to implement rearrange.
+define(`VECTOR_REARRANGE_B', `
+instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange $1B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+VECTOR_REARRANGE_B(8, D)
+VECTOR_REARRANGE_B(16, X)
+dnl
+define(`VECTOR_REARRANGE_S', `
+instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T$1H, $shuffle, $tmp0\n\t"
+ "addv $dst, T$3B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange $1S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T$1H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T$3B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T$3B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_REARRANGE_S(4, D, 8)
+VECTOR_REARRANGE_S(8, X, 16)
+
+instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4I" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Anytrue/alltrue -----------------------------
+dnl
+define(`ANYTRUE_IN_MASK', `
+instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+ANYTRUE_IN_MASK(8, D)
+ANYTRUE_IN_MASK(16, X)
+dnl
+define(`ALLTRUE_IN_MASK', `
+instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T$1B, $tmp\n\t"
+ "addv $tmp, T$1B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T$1B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+ALLTRUE_IN_MASK(8, D)
+ALLTRUE_IN_MASK(16, X)
+dnl
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
index 90442c7b8b6..f34d4890c70 100644
--- a/src/hotspot/cpu/aarch64/aarch64_sve.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad
@@ -159,6 +159,31 @@ source %{
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
+ // Vector API specific
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ case Op_MaxReductionV:
+ case Op_MinReductionV:
+ case Op_LoadVectorGather:
+ case Op_StoreVectorScatter:
+ case Op_VectorBlend:
+ case Op_VectorCast:
+ case Op_VectorCastB2X:
+ case Op_VectorCastD2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastS2X:
+ case Op_VectorInsert:
+ case Op_VectorLoadConst:
+ case Op_VectorLoadMask:
+ case Op_VectorLoadShuffle:
+ case Op_VectorMaskCmp:
+ case Op_VectorRearrange:
+ case Op_VectorReinterpret:
+ case Op_VectorStoreMask:
+ case Op_VectorTest:
return false;
default:
return true;
@@ -846,9 +871,49 @@ instruct vpopcountI(vReg dst, vReg src) %{
// vector add reduction
+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (B)\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "sxtb $dst, $dst\t # add reduction B" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ B,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (H)\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "sxth $dst, $dst\t # add reduction H" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -866,7 +931,7 @@ instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -1264,7 +1329,7 @@ instruct vlsrL(vReg dst, vReg shift) %{
instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (RShiftVB src shift));
+ match(Set dst (RShiftVB src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1283,7 +1348,7 @@ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (RShiftVS src shift));
+ match(Set dst (RShiftVS src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
@@ -1302,7 +1367,7 @@ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (RShiftVI src shift));
+ match(Set dst (RShiftVI src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1320,7 +1385,7 @@ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (RShiftVL src shift));
+ match(Set dst (RShiftVL src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
@@ -1338,7 +1403,7 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (URShiftVB src shift));
+ match(Set dst (URShiftVB src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1361,7 +1426,7 @@ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (URShiftVS src shift));
+ match(Set dst (URShiftVS src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
@@ -1371,7 +1436,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
as_FloatRegister($src$$reg));
return;
}
- if (con >= 8) {
+ if (con >= 16) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
@@ -1384,7 +1449,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (URShiftVI src shift));
+ match(Set dst (URShiftVI src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1402,7 +1467,7 @@ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (URShiftVL src shift));
+ match(Set dst (URShiftVL src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
@@ -1420,7 +1485,7 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (LShiftVB src shift));
+ match(Set dst (LShiftVB src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1438,12 +1503,12 @@ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (LShiftVS src shift));
+ match(Set dst (LShiftVS src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
int con = (int)$shift$$constant;
- if (con >= 8) {
+ if (con >= 16) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
@@ -1456,7 +1521,7 @@ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (LShiftVI src shift));
+ match(Set dst (LShiftVI src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1469,7 +1534,7 @@ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (LShiftVL src shift));
+ match(Set dst (LShiftVL src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
index 7bb76cc5941..7fe0861a717 100644
--- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
@@ -146,6 +146,31 @@ source %{
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
+ // Vector API specific
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ case Op_MaxReductionV:
+ case Op_MinReductionV:
+ case Op_LoadVectorGather:
+ case Op_StoreVectorScatter:
+ case Op_VectorBlend:
+ case Op_VectorCast:
+ case Op_VectorCastB2X:
+ case Op_VectorCastD2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastS2X:
+ case Op_VectorInsert:
+ case Op_VectorLoadConst:
+ case Op_VectorLoadMask:
+ case Op_VectorLoadShuffle:
+ case Op_VectorMaskCmp:
+ case Op_VectorRearrange:
+ case Op_VectorReinterpret:
+ case Op_VectorStoreMask:
+ case Op_VectorTest:
return false;
default:
return true;
@@ -507,15 +532,38 @@ instruct vpopcountI(vReg dst, vReg src) %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
-%}
+%}dnl
+dnl
+dnl REDUCE_ADD_EXT($1, $2, $3, $4, $5, $6, $7 )
+dnl REDUCE_ADD_EXT(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
+define(`REDUCE_ADD_EXT', `
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6);
+ match(Set dst ($2 src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
+ "smov $dst, $tmp, $5, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "$7 $dst, $dst\t # add reduction $5" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ $7($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
dnl
dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
define(`REDUCE_ADD', `
instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- ELEMENT_SHORT_CHAR($6, n->in(2)));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6);
match(Set dst ($2 src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -545,8 +593,10 @@ instruct $1($3 src1_dst, vReg src2) %{
%}
ins_pipe(pipe_slow);
%}')dnl
-dnl
+
// vector add reduction
+REDUCE_ADD_EXT(reduce_addB, AddReductionVI, iRegINoSp, iRegIorL2I, B, T_BYTE, sxtb)
+REDUCE_ADD_EXT(reduce_addS, AddReductionVI, iRegINoSp, iRegIorL2I, H, T_SHORT, sxth)
REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
@@ -677,14 +727,14 @@ instruct $1(vReg dst, vReg shift) %{
ins_pipe(pipe_slow);
%}')dnl
dnl
-dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
-dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5, $6 )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, op_name2, size, min_vec_len, insn)
define(`VSHIFT_IMM_UNPREDICATE', `
instruct $1(vReg dst, vReg src, immI shift) %{
- predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
- match(Set dst ($2 src shift));
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src ($3 shift)));
ins_cost(SVE_COST);
- format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+ format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
ins_encode %{
int con = (int)$shift$$constant;dnl
ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
@@ -693,16 +743,21 @@ ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
as_FloatRegister($src$$reg));
return;
}')dnl
-ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
- if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
+ if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
if (con >= 16) con = 15;')')dnl
-ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+ifelse(eval(index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
if (con >= 8) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
- }')
- __ $5(as_FloatRegister($dst$$reg), __ $3,
+ }')ifelse(eval(index(`$4', `H') == 0), 1, `
+ if (con >= 16) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')')
+ __ $6(as_FloatRegister($dst$$reg), __ $4,
as_FloatRegister($src$$reg), con);
%}
ins_pipe(pipe_slow);
@@ -736,18 +791,18 @@ VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, RShiftCntV, B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, RShiftCntV, H, 8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, RShiftCntV, S, 4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, RShiftCntV, D, 2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, RShiftCntV, H, 8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, RShiftCntV, S, 4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, RShiftCntV, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, LShiftCntV, B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, LShiftCntV, H, 8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, LShiftCntV, S, 4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, LShiftCntV, D, 2, sve_lsl)
VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
index 29f63ba69a4..c7fac2836b7 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
@@ -590,7 +590,7 @@ void entry(CodeBuffer *cb) {
__ stnp(r23, r29, Address(r12, 32)); // stnp x23, x29, [x12, #32]
__ ldnp(r0, r6, Address(r21, -80)); // ldnp x0, x6, [x21, #-80]
-// LdStSIMDOp
+// LdStNEONOp
__ ld1(v15, __ T8B, Address(r26)); // ld1 {v15.8B}, [x26]
__ ld1(v23, v24, __ T16B, Address(__ post(r11, 32))); // ld1 {v23.16B, v24.16B}, [x11], 32
__ ld1(v8, v9, v10, __ T1D, Address(__ post(r23, r7))); // ld1 {v8.1D, v9.1D, v10.1D}, [x23], x7
@@ -614,11 +614,146 @@ void entry(CodeBuffer *cb) {
__ ld4r(v0, v1, v2, v3, __ T4H, Address(__ post(r26, 8))); // ld4r {v0.4H, v1.4H, v2.4H, v3.4H}, [x26], 8
__ ld4r(v12, v13, v14, v15, __ T2S, Address(__ post(r25, r2))); // ld4r {v12.2S, v13.2S, v14.2S, v15.2S}, [x25], x2
-// SHA512SIMDOp
- __ sha512h(v22, __ T2D, v27, v4); // sha512h q22, q27, v4.2D
- __ sha512h2(v7, __ T2D, v6, v1); // sha512h2 q7, q6, v1.2D
- __ sha512su0(v26, __ T2D, v15); // sha512su0 v26.2D, v15.2D
- __ sha512su1(v2, __ T2D, v13, v13); // sha512su1 v2.2D, v13.2D, v13.2D
+// NEONReduceInstruction
+ __ addv(v22, __ T8B, v23); // addv b22, v23.8B
+ __ addv(v27, __ T16B, v28); // addv b27, v28.16B
+ __ addv(v4, __ T4H, v5); // addv h4, v5.4H
+ __ addv(v7, __ T8H, v8); // addv h7, v8.8H
+ __ addv(v6, __ T4S, v7); // addv s6, v7.4S
+ __ smaxv(v1, __ T8B, v2); // smaxv b1, v2.8B
+ __ smaxv(v26, __ T16B, v27); // smaxv b26, v27.16B
+ __ smaxv(v15, __ T4H, v16); // smaxv h15, v16.4H
+ __ smaxv(v2, __ T8H, v3); // smaxv h2, v3.8H
+ __ smaxv(v13, __ T4S, v14); // smaxv s13, v14.4S
+ __ fmaxv(v13, __ T4S, v14); // fmaxv s13, v14.4S
+ __ sminv(v24, __ T8B, v25); // sminv b24, v25.8B
+ __ sminv(v23, __ T16B, v24); // sminv b23, v24.16B
+ __ sminv(v4, __ T4H, v5); // sminv h4, v5.4H
+ __ sminv(v19, __ T8H, v20); // sminv h19, v20.8H
+ __ sminv(v15, __ T4S, v16); // sminv s15, v16.4S
+ __ fminv(v0, __ T4S, v1); // fminv s0, v1.4S
+
+// TwoRegNEONOp
+ __ absr(v4, __ T8B, v5); // abs v4.8B, v5.8B
+ __ absr(v20, __ T16B, v21); // abs v20.16B, v21.16B
+ __ absr(v11, __ T4H, v12); // abs v11.4H, v12.4H
+ __ absr(v29, __ T8H, v30); // abs v29.8H, v30.8H
+ __ absr(v15, __ T2S, v16); // abs v15.2S, v16.2S
+ __ absr(v21, __ T4S, v22); // abs v21.4S, v22.4S
+ __ absr(v4, __ T2D, v5); // abs v4.2D, v5.2D
+ __ fabs(v14, __ T2S, v15); // fabs v14.2S, v15.2S
+ __ fabs(v22, __ T4S, v23); // fabs v22.4S, v23.4S
+ __ fabs(v25, __ T2D, v26); // fabs v25.2D, v26.2D
+ __ fneg(v6, __ T2S, v7); // fneg v6.2S, v7.2S
+ __ fneg(v12, __ T4S, v13); // fneg v12.4S, v13.4S
+ __ fneg(v14, __ T2D, v15); // fneg v14.2D, v15.2D
+ __ fsqrt(v13, __ T2S, v14); // fsqrt v13.2S, v14.2S
+ __ fsqrt(v14, __ T4S, v15); // fsqrt v14.4S, v15.4S
+ __ fsqrt(v9, __ T2D, v10); // fsqrt v9.2D, v10.2D
+ __ notr(v25, __ T8B, v26); // not v25.8B, v26.8B
+ __ notr(v28, __ T16B, v29); // not v28.16B, v29.16B
+
+// ThreeRegNEONOp
+ __ andr(v10, __ T8B, v11, v12); // and v10.8B, v11.8B, v12.8B
+ __ andr(v19, __ T16B, v20, v21); // and v19.16B, v20.16B, v21.16B
+ __ orr(v11, __ T8B, v12, v13); // orr v11.8B, v12.8B, v13.8B
+ __ orr(v17, __ T16B, v18, v19); // orr v17.16B, v18.16B, v19.16B
+ __ eor(v21, __ T8B, v22, v23); // eor v21.8B, v22.8B, v23.8B
+ __ eor(v15, __ T16B, v16, v17); // eor v15.16B, v16.16B, v17.16B
+ __ addv(v20, __ T8B, v21, v22); // add v20.8B, v21.8B, v22.8B
+ __ addv(v23, __ T16B, v24, v25); // add v23.16B, v24.16B, v25.16B
+ __ addv(v26, __ T4H, v27, v28); // add v26.4H, v27.4H, v28.4H
+ __ addv(v5, __ T8H, v6, v7); // add v5.8H, v6.8H, v7.8H
+ __ addv(v6, __ T2S, v7, v8); // add v6.2S, v7.2S, v8.2S
+ __ addv(v15, __ T4S, v16, v17); // add v15.4S, v16.4S, v17.4S
+ __ addv(v15, __ T2D, v16, v17); // add v15.2D, v16.2D, v17.2D
+ __ fadd(v25, __ T2S, v26, v27); // fadd v25.2S, v26.2S, v27.2S
+ __ fadd(v16, __ T4S, v17, v18); // fadd v16.4S, v17.4S, v18.4S
+ __ fadd(v27, __ T2D, v28, v29); // fadd v27.2D, v28.2D, v29.2D
+ __ subv(v24, __ T8B, v25, v26); // sub v24.8B, v25.8B, v26.8B
+ __ subv(v15, __ T16B, v16, v17); // sub v15.16B, v16.16B, v17.16B
+ __ subv(v25, __ T4H, v26, v27); // sub v25.4H, v26.4H, v27.4H
+ __ subv(v14, __ T8H, v15, v16); // sub v14.8H, v15.8H, v16.8H
+ __ subv(v10, __ T2S, v11, v12); // sub v10.2S, v11.2S, v12.2S
+ __ subv(v13, __ T4S, v14, v15); // sub v13.4S, v14.4S, v15.4S
+ __ subv(v14, __ T2D, v15, v16); // sub v14.2D, v15.2D, v16.2D
+ __ fsub(v20, __ T2S, v21, v22); // fsub v20.2S, v21.2S, v22.2S
+ __ fsub(v1, __ T4S, v2, v3); // fsub v1.4S, v2.4S, v3.4S
+ __ fsub(v22, __ T2D, v23, v24); // fsub v22.2D, v23.2D, v24.2D
+ __ mulv(v30, __ T8B, v31, v0); // mul v30.8B, v31.8B, v0.8B
+ __ mulv(v14, __ T16B, v15, v16); // mul v14.16B, v15.16B, v16.16B
+ __ mulv(v2, __ T4H, v3, v4); // mul v2.4H, v3.4H, v4.4H
+ __ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H
+ __ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S
+ __ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S
+ __ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S
+ __ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S
+ __ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D
+ __ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H
+ __ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H
+ __ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S
+ __ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S
+ __ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S
+ __ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S
+ __ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D
+ __ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H
+ __ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H
+ __ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S
+ __ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S
+ __ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S
+ __ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S
+ __ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D
+ __ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S
+ __ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S
+ __ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D
+ __ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B
+ __ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B
+ __ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H
+ __ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H
+ __ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S
+ __ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S
+ __ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S
+ __ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S
+ __ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D
+ __ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B
+ __ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B
+ __ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H
+ __ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H
+ __ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S
+ __ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S
+ __ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S
+ __ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S
+ __ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D
+ __ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B
+ __ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B
+ __ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H
+ __ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H
+ __ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S
+ __ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S
+ __ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D
+ __ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S
+ __ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S
+ __ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D
+ __ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B
+ __ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B
+ __ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H
+ __ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H
+ __ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S
+ __ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S
+ __ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D
+ __ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S
+ __ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S
+ __ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D
+ __ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B
+ __ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B
+ __ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H
+ __ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H
+ __ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S
+ __ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S
+ __ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D
+ __ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S
+ __ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S
+ __ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D
// SpecialCases
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
@@ -705,774 +840,160 @@ void entry(CodeBuffer *cb) {
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625
// LSEOp
- __ swp(Assembler::xword, r24, r24, r4); // swp x24, x24, [x4]
- __ ldadd(Assembler::xword, r20, r16, r0); // ldadd x20, x16, [x0]
- __ ldbic(Assembler::xword, r4, r21, r11); // ldclr x4, x21, [x11]
- __ ldeor(Assembler::xword, r30, r16, r22); // ldeor x30, x16, [x22]
- __ ldorr(Assembler::xword, r4, r15, r23); // ldset x4, x15, [x23]
- __ ldsmin(Assembler::xword, r26, r6, r12); // ldsmin x26, x6, [x12]
- __ ldsmax(Assembler::xword, r15, r14, r15); // ldsmax x15, x14, [x15]
- __ ldumin(Assembler::xword, r9, r25, r29); // ldumin x9, x25, [x29]
- __ ldumax(Assembler::xword, r11, r20, r12); // ldumax x11, x20, [x12]
+ __ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29]
+ __ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26]
+ __ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24]
+ __ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15]
+ __ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24]
+ __ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20]
+ __ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2]
+ __ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24]
+ __ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12]
// LSEOp
- __ swpa(Assembler::xword, r16, r22, r16); // swpa x16, x22, [x16]
- __ ldadda(Assembler::xword, r21, r24, r26); // ldadda x21, x24, [x26]
- __ ldbica(Assembler::xword, r6, r6, r16); // ldclra x6, x6, [x16]
- __ ldeora(Assembler::xword, r16, r25, r16); // ldeora x16, x25, [x16]
- __ ldorra(Assembler::xword, r28, r24, r16); // ldseta x28, x24, [x16]
- __ ldsmina(Assembler::xword, r26, r15, r10); // ldsmina x26, x15, [x10]
- __ ldsmaxa(Assembler::xword, r13, r14, r20); // ldsmaxa x13, x14, [x20]
- __ ldumina(Assembler::xword, r1, r23, r30); // ldumina x1, x23, [x30]
- __ ldumaxa(Assembler::xword, r14, r2, r6); // ldumaxa x14, x2, [x6]
+ __ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10]
+ __ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12]
+ __ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1]
+ __ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0]
+ __ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17]
+ __ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28]
+ __ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1]
+ __ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4]
+ __ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6]
// LSEOp
- __ swpal(Assembler::xword, r3, r8, r25); // swpal x3, x8, [x25]
- __ ldaddal(Assembler::xword, r0, r27, r30); // ldaddal x0, x27, [x30]
- __ ldbical(Assembler::xword, r5, r5, r30); // ldclral x5, x5, [x30]
- __ ldeoral(Assembler::xword, r11, r25, r0); // ldeoral x11, x25, [x0]
- __ ldorral(Assembler::xword, zr, r0, r19); // ldsetal xzr, x0, [x19]
- __ ldsminal(Assembler::xword, r29, r26, r9); // ldsminal x29, x26, [x9]
- __ ldsmaxal(Assembler::xword, r26, r12, r15); // ldsmaxal x26, x12, [x15]
- __ lduminal(Assembler::xword, r11, r11, r15); // lduminal x11, x11, [x15]
- __ ldumaxal(Assembler::xword, r25, r22, r24); // ldumaxal x25, x22, [x24]
+ __ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15]
+ __ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12]
+ __ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1]
+ __ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0]
+ __ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26]
+ __ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3]
+ __ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10]
+ __ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2]
+ __ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10]
// LSEOp
- __ swpl(Assembler::xword, r0, r17, r11); // swpl x0, x17, [x11]
- __ ldaddl(Assembler::xword, r6, r29, r6); // ldaddl x6, x29, [x6]
- __ ldbicl(Assembler::xword, r5, r5, r21); // ldclrl x5, x5, [x21]
- __ ldeorl(Assembler::xword, r19, r16, r15); // ldeorl x19, x16, [x15]
- __ ldorrl(Assembler::xword, r30, r27, r28); // ldsetl x30, x27, [x28]
- __ ldsminl(Assembler::xword, r1, r28, r1); // ldsminl x1, x28, [x1]
- __ ldsmaxl(Assembler::xword, r20, r29, r16); // ldsmaxl x20, x29, [x16]
- __ lduminl(Assembler::xword, r13, r10, r29); // lduminl x13, x10, [x29]
- __ ldumaxl(Assembler::xword, r29, r19, r22); // ldumaxl x29, x19, [x22]
+ __ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2]
+ __ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12]
+ __ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2]
+ __ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26]
+ __ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2]
+ __ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15]
+ __ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15]
+ __ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23]
+ __ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23]
// LSEOp
- __ swp(Assembler::word, r10, r4, sp); // swp w10, w4, [sp]
- __ ldadd(Assembler::word, r21, r8, sp); // ldadd w21, w8, [sp]
- __ ldbic(Assembler::word, r19, r10, r28); // ldclr w19, w10, [x28]
- __ ldeor(Assembler::word, r2, r25, r5); // ldeor w2, w25, [x5]
- __ ldorr(Assembler::word, r3, r8, r22); // ldset w3, w8, [x22]
- __ ldsmin(Assembler::word, r19, r13, r5); // ldsmin w19, w13, [x5]
- __ ldsmax(Assembler::word, r29, r24, r21); // ldsmax w29, w24, [x21]
- __ ldumin(Assembler::word, r26, r24, r3); // ldumin w26, w24, [x3]
- __ ldumax(Assembler::word, r24, r26, r23); // ldumax w24, w26, [x23]
+ __ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11]
+ __ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12]
+ __ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16]
+ __ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3]
+ __ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17]
+ __ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1]
+ __ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5]
+ __ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29]
+ __ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20]
// LSEOp
- __ swpa(Assembler::word, r15, r21, r3); // swpa w15, w21, [x3]
- __ ldadda(Assembler::word, r24, r8, r25); // ldadda w24, w8, [x25]
- __ ldbica(Assembler::word, r20, r16, r17); // ldclra w20, w16, [x17]
- __ ldeora(Assembler::word, r2, r1, r0); // ldeora w2, w1, [x0]
- __ ldorra(Assembler::word, r24, r4, r3); // ldseta w24, w4, [x3]
- __ ldsmina(Assembler::word, r12, zr, r28); // ldsmina w12, wzr, [x28]
- __ ldsmaxa(Assembler::word, r10, r26, r2); // ldsmaxa w10, w26, [x2]
- __ ldumina(Assembler::word, r12, r16, sp); // ldumina w12, w16, [sp]
- __ ldumaxa(Assembler::word, r1, r13, r29); // ldumaxa w1, w13, [x29]
+ __ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23]
+ __ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27]
+ __ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1]
+ __ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp]
+ __ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9]
+ __ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29]
+ __ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19]
+ __ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14]
+ __ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15]
// LSEOp
- __ swpal(Assembler::word, r0, r19, r12); // swpal w0, w19, [x12]
- __ ldaddal(Assembler::word, r17, r22, r13); // ldaddal w17, w22, [x13]
- __ ldbical(Assembler::word, r28, r30, sp); // ldclral w28, w30, [sp]
- __ ldeoral(Assembler::word, r1, r26, r28); // ldeoral w1, w26, [x28]
- __ ldorral(Assembler::word, r4, r30, r4); // ldsetal w4, w30, [x4]
- __ ldsminal(Assembler::word, r6, r30, r26); // ldsminal w6, w30, [x26]
- __ ldsmaxal(Assembler::word, r16, r9, r8); // ldsmaxal w16, w9, [x8]
- __ lduminal(Assembler::word, r12, r0, r20); // lduminal w12, w0, [x20]
- __ ldumaxal(Assembler::word, r1, r24, r2); // ldumaxal w1, w24, [x2]
+ __ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16]
+ __ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9]
+ __ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17]
+ __ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30]
+ __ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20]
+ __ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24]
+ __ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22]
+ __ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15]
+ __ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19]
// LSEOp
- __ swpl(Assembler::word, r0, r9, r24); // swpl w0, w9, [x24]
- __ ldaddl(Assembler::word, r26, r16, r30); // ldaddl w26, w16, [x30]
- __ ldbicl(Assembler::word, r3, r10, r23); // ldclrl w3, w10, [x23]
- __ ldeorl(Assembler::word, r10, r4, r15); // ldeorl w10, w4, [x15]
- __ ldorrl(Assembler::word, r2, r11, r8); // ldsetl w2, w11, [x8]
- __ ldsminl(Assembler::word, r10, r15, r17); // ldsminl w10, w15, [x17]
- __ ldsmaxl(Assembler::word, r2, r10, r12); // ldsmaxl w2, w10, [x12]
- __ lduminl(Assembler::word, r12, r15, r13); // lduminl w12, w15, [x13]
- __ ldumaxl(Assembler::word, r2, r7, r20); // ldumaxl w2, w7, [x20]
+ __ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15]
+ __ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16]
+ __ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23]
+ __ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19]
+ __ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2]
+ __ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13]
+ __ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29]
+ __ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27]
+ __ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp]
+
+// SHA3SIMDOp
+ __ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B
+ __ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B
+ __ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D
+ __ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54
+
+// SHA512SIMDOp
+ __ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D
+ __ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D
+ __ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D
+ __ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D
// SVEVectorOp
- __ sve_add(z25, __ B, z15, z4); // add z25.b, z15.b, z4.b
- __ sve_sub(z4, __ S, z11, z17); // sub z4.s, z11.s, z17.s
- __ sve_fadd(z16, __ D, z17, z10); // fadd z16.d, z17.d, z10.d
- __ sve_fmul(z22, __ D, z12, z25); // fmul z22.d, z12.d, z25.d
- __ sve_fsub(z28, __ D, z14, z10); // fsub z28.d, z14.d, z10.d
- __ sve_abs(z1, __ H, p3, z30); // abs z1.h, p3/m, z30.h
- __ sve_add(z15, __ B, p1, z2); // add z15.b, p1/m, z15.b, z2.b
- __ sve_asr(z13, __ S, p4, z16); // asr z13.s, p4/m, z13.s, z16.s
- __ sve_cnt(z3, __ D, p0, z11); // cnt z3.d, p0/m, z11.d
- __ sve_lsl(z5, __ D, p2, z14); // lsl z5.d, p2/m, z5.d, z14.d
- __ sve_lsr(z29, __ B, p0, z20); // lsr z29.b, p0/m, z29.b, z20.b
- __ sve_mul(z20, __ S, p5, z27); // mul z20.s, p5/m, z20.s, z27.s
- __ sve_neg(z26, __ B, p6, z4); // neg z26.b, p6/m, z4.b
- __ sve_not(z22, __ B, p4, z30); // not z22.b, p4/m, z30.b
- __ sve_smax(z11, __ H, p2, z27); // smax z11.h, p2/m, z11.h, z27.h
- __ sve_smin(z28, __ S, p5, z30); // smin z28.s, p5/m, z28.s, z30.s
- __ sve_sub(z30, __ S, p1, z13); // sub z30.s, p1/m, z30.s, z13.s
- __ sve_fabs(z30, __ D, p4, z26); // fabs z30.d, p4/m, z26.d
- __ sve_fadd(z15, __ S, p3, z11); // fadd z15.s, p3/m, z15.s, z11.s
- __ sve_fdiv(z6, __ D, p7, z16); // fdiv z6.d, p7/m, z6.d, z16.d
- __ sve_fmax(z27, __ S, p7, z7); // fmax z27.s, p7/m, z27.s, z7.s
- __ sve_fmin(z19, __ D, p2, z4); // fmin z19.d, p2/m, z19.d, z4.d
- __ sve_fmul(z17, __ S, p4, z22); // fmul z17.s, p4/m, z17.s, z22.s
- __ sve_fneg(z28, __ D, p3, z21); // fneg z28.d, p3/m, z21.d
- __ sve_frintm(z17, __ S, p5, z2); // frintm z17.s, p5/m, z2.s
- __ sve_frintn(z6, __ S, p3, z15); // frintn z6.s, p3/m, z15.s
- __ sve_frintp(z12, __ D, p5, z1); // frintp z12.d, p5/m, z1.d
- __ sve_fsqrt(z17, __ S, p1, z17); // fsqrt z17.s, p1/m, z17.s
- __ sve_fsub(z15, __ S, p5, z13); // fsub z15.s, p5/m, z15.s, z13.s
- __ sve_fmla(z20, __ D, p7, z27, z11); // fmla z20.d, p7/m, z27.d, z11.d
- __ sve_fmls(z3, __ D, p0, z30, z23); // fmls z3.d, p0/m, z30.d, z23.d
- __ sve_fnmla(z17, __ S, p2, z27, z26); // fnmla z17.s, p2/m, z27.s, z26.s
- __ sve_fnmls(z6, __ D, p5, z22, z30); // fnmls z6.d, p5/m, z22.d, z30.d
- __ sve_mla(z2, __ H, p7, z26, z17); // mla z2.h, p7/m, z26.h, z17.h
- __ sve_mls(z22, __ B, p4, z2, z17); // mls z22.b, p4/m, z2.b, z17.b
- __ sve_and(z24, z25, z22); // and z24.d, z25.d, z22.d
- __ sve_eor(z17, z12, z3); // eor z17.d, z12.d, z3.d
- __ sve_orr(z29, z28, z16); // orr z29.d, z28.d, z16.d
+ __ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d
+ __ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d
+ __ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d
+ __ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s
+ __ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s
+ __ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b
+ __ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h
+ __ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d
+ __ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d
+ __ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b
+ __ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h
+ __ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b
+ __ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s
+ __ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h
+ __ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b
+ __ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d
+ __ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s
+ __ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d
+ __ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s
+ __ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s
+ __ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s
+ __ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s
+ __ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s
+ __ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d
+ __ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s
+ __ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s
+ __ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s
+ __ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s
+ __ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s
+ __ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d
+ __ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s
+ __ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s
+ __ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d
+ __ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s
+ __ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s
+ __ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d
+ __ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d
+ __ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d
// SVEReductionOp
- __ sve_andv(v6, __ S, p2, z28); // andv s6, p2, z28.s
- __ sve_orv(v7, __ H, p1, z7); // orv h7, p1, z7.h
- __ sve_eorv(v9, __ B, p5, z8); // eorv b9, p5, z8.b
- __ sve_smaxv(v27, __ B, p5, z30); // smaxv b27, p5, z30.b
- __ sve_sminv(v26, __ H, p0, z16); // sminv h26, p0, z16.h
- __ sve_fminv(v3, __ D, p6, z8); // fminv d3, p6, z8.d
- __ sve_fmaxv(v21, __ D, p6, z26); // fmaxv d21, p6, z26.d
- __ sve_fadda(v22, __ S, p0, z4); // fadda s22, p0, s22, z4.s
- __ sve_uaddv(v17, __ H, p0, z3); // uaddv d17, p0, z3.h
+ __ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d
+ __ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h
+ __ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h
+ __ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d
+ __ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s
+ __ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s
+ __ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d
+ __ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s
+ __ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b
__ bind(forth);
/*
-aarch64ops.o: file format elf64-littleaarch64
-
-
-Disassembly of section .text:
-
-0000000000000000 :
- 0: 8b0d82fa add x26, x23, x13, lsl #32
- 4: cb49970c sub x12, x24, x9, lsr #37
- 8: ab889dfc adds x28, x15, x8, asr #39
- c: eb9ee787 subs x7, x28, x30, asr #57
- 10: 0b9b3ec9 add w9, w22, w27, asr #15
- 14: 4b9179a3 sub w3, w13, w17, asr #30
- 18: 2b88474e adds w14, w26, w8, asr #17
- 1c: 6b8c56c0 subs w0, w22, w12, asr #21
- 20: 8a1a51e0 and x0, x15, x26, lsl #20
- 24: aa11f4ba orr x26, x5, x17, lsl #61
- 28: ca0281b8 eor x24, x13, x2, lsl #32
- 2c: ea918c7c ands x28, x3, x17, asr #35
- 30: 0a5d4a19 and w25, w16, w29, lsr #18
- 34: 2a4b262d orr w13, w17, w11, lsr #9
- 38: 4a513ca5 eor w5, w5, w17, lsr #15
- 3c: 6a9b6ae2 ands w2, w23, w27, asr #26
- 40: 8a70b79b bic x27, x28, x16, lsr #45
- 44: aaba9728 orn x8, x25, x26, asr #37
- 48: ca6dfe3d eon x29, x17, x13, lsr #63
- 4c: ea627f1c bics x28, x24, x2, lsr #31
- 50: 0aa70f53 bic w19, w26, w7, asr #3
- 54: 2aaa0f06 orn w6, w24, w10, asr #3
- 58: 4a6176a4 eon w4, w21, w1, lsr #29
- 5c: 6a604eb0 bics w16, w21, w0, lsr #19
- 60: 1105ed91 add w17, w12, #0x17b
- 64: 3100583e adds w30, w1, #0x16
- 68: 5101f8bd sub w29, w5, #0x7e
- 6c: 710f0306 subs w6, w24, #0x3c0
- 70: 9101a1a0 add x0, x13, #0x68
- 74: b10a5cc8 adds x8, x6, #0x297
- 78: d10810aa sub x10, x5, #0x204
- 7c: f10fd061 subs x1, x3, #0x3f4
- 80: 120cb166 and w6, w11, #0xfff1fff1
- 84: 321764bc orr w28, w5, #0xfffffe07
- 88: 52174681 eor w1, w20, #0x7fffe00
- 8c: 720c0227 ands w7, w17, #0x100000
- 90: 9241018e and x14, x12, #0x8000000000000000
- 94: b25a2969 orr x9, x11, #0x1ffc000000000
- 98: d278b411 eor x17, x0, #0x3fffffffffff00
- 9c: f26aad01 ands x1, x8, #0xffffffffffc00003
- a0: 14000000 b a0
- a4: 17ffffd7 b 0
- a8: 14000242 b 9b0
- ac: 94000000 bl ac
- b0: 97ffffd4 bl 0
- b4: 9400023f bl 9b0
- b8: 3400000a cbz w10, b8
- bc: 34fffa2a cbz w10, 0
- c0: 3400478a cbz w10, 9b0
- c4: 35000008 cbnz w8, c4
- c8: 35fff9c8 cbnz w8, 0
- cc: 35004728 cbnz w8, 9b0
- d0: b400000b cbz x11, d0
- d4: b4fff96b cbz x11, 0
- d8: b40046cb cbz x11, 9b0
- dc: b500001d cbnz x29, dc
- e0: b5fff91d cbnz x29, 0
- e4: b500467d cbnz x29, 9b0
- e8: 10000013 adr x19, e8
- ec: 10fff8b3 adr x19, 0
- f0: 10004613 adr x19, 9b0
- f4: 90000013 adrp x19, 0
- f8: 36300016 tbz w22, #6, f8
- fc: 3637f836 tbz w22, #6, 0
- 100: 36304596 tbz w22, #6, 9b0
- 104: 3758000c tbnz w12, #11, 104
- 108: 375ff7cc tbnz w12, #11, 0
- 10c: 3758452c tbnz w12, #11, 9b0
- 110: 128313a0 mov w0, #0xffffe762 // #-6302
- 114: 528a32c7 mov w7, #0x5196 // #20886
- 118: 7289173b movk w27, #0x48b9
- 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929
- 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152
- 124: f2c285e8 movk x8, #0x142f, lsl #32
- 128: 9358722f sbfx x15, x17, #24, #5
- 12c: 330e652f bfxil w15, w9, #14, #12
- 130: 53067f3b lsr w27, w25, #6
- 134: 93577c53 sbfx x19, x2, #23, #9
- 138: b34a1aac bfi x12, x21, #54, #7
- 13c: d35a4016 ubfiz x22, x0, #38, #17
- 140: 13946c63 extr w3, w3, w20, #27
- 144: 93c3dbc8 extr x8, x30, x3, #54
- 148: 54000000 b.eq 148 // b.none
- 14c: 54fff5a0 b.eq 0 // b.none
- 150: 54004300 b.eq 9b0 // b.none
- 154: 54000001 b.ne 154 // b.any
- 158: 54fff541 b.ne 0 // b.any
- 15c: 540042a1 b.ne 9b0 // b.any
- 160: 54000002 b.cs 160 // b.hs, b.nlast
- 164: 54fff4e2 b.cs 0 // b.hs, b.nlast
- 168: 54004242 b.cs 9b0 // b.hs, b.nlast
- 16c: 54000002 b.cs 16c // b.hs, b.nlast
- 170: 54fff482 b.cs 0 // b.hs, b.nlast
- 174: 540041e2 b.cs 9b0 // b.hs, b.nlast
- 178: 54000003 b.cc 178 // b.lo, b.ul, b.last
- 17c: 54fff423 b.cc 0 // b.lo, b.ul, b.last
- 180: 54004183 b.cc 9b0 // b.lo, b.ul, b.last
- 184: 54000003 b.cc 184 // b.lo, b.ul, b.last
- 188: 54fff3c3 b.cc 0 // b.lo, b.ul, b.last
- 18c: 54004123 b.cc 9b0 // b.lo, b.ul, b.last
- 190: 54000004 b.mi 190 // b.first
- 194: 54fff364 b.mi 0 // b.first
- 198: 540040c4 b.mi 9b0 // b.first
- 19c: 54000005 b.pl 19c // b.nfrst
- 1a0: 54fff305 b.pl 0 // b.nfrst
- 1a4: 54004065 b.pl 9b0 // b.nfrst
- 1a8: 54000006 b.vs 1a8
- 1ac: 54fff2a6 b.vs 0
- 1b0: 54004006 b.vs 9b0
- 1b4: 54000007 b.vc 1b4
- 1b8: 54fff247 b.vc 0
- 1bc: 54003fa7 b.vc 9b0
- 1c0: 54000008 b.hi 1c0 // b.pmore
- 1c4: 54fff1e8 b.hi 0 // b.pmore
- 1c8: 54003f48 b.hi 9b0 // b.pmore
- 1cc: 54000009 b.ls 1cc // b.plast
- 1d0: 54fff189 b.ls 0 // b.plast
- 1d4: 54003ee9 b.ls 9b0 // b.plast
- 1d8: 5400000a b.ge 1d8 // b.tcont
- 1dc: 54fff12a b.ge 0 // b.tcont
- 1e0: 54003e8a b.ge 9b0 // b.tcont
- 1e4: 5400000b b.lt 1e4 // b.tstop
- 1e8: 54fff0cb b.lt 0 // b.tstop
- 1ec: 54003e2b b.lt 9b0 // b.tstop
- 1f0: 5400000c b.gt 1f0
- 1f4: 54fff06c b.gt 0
- 1f8: 54003dcc b.gt 9b0
- 1fc: 5400000d b.le 1fc
- 200: 54fff00d b.le 0
- 204: 54003d6d b.le 9b0
- 208: 5400000e b.al 208
- 20c: 54ffefae b.al 0
- 210: 54003d0e b.al 9b0
- 214: 5400000f b.nv 214
- 218: 54ffef4f b.nv 0
- 21c: 54003caf b.nv 9b0
- 220: d40658e1 svc #0x32c7
- 224: d4014d22 hvc #0xa69
- 228: d4046543 smc #0x232a
- 22c: d4273f60 brk #0x39fb
- 230: d44cad80 hlt #0x656c
- 234: d503201f nop
- 238: d69f03e0 eret
- 23c: d6bf03e0 drps
- 240: d5033fdf isb
- 244: d5033e9f dsb st
- 248: d50332bf dmb oshst
- 24c: d61f0200 br x16
- 250: d63f0280 blr x20
- 254: c80a7d1b stxr w10, x27, [x8]
- 258: c800fea1 stlxr w0, x1, [x21]
- 25c: c85f7fb1 ldxr x17, [x29]
- 260: c85fff9d ldaxr x29, [x28]
- 264: c89ffee1 stlr x1, [x23]
- 268: c8dffe95 ldar x21, [x20]
- 26c: 88167e7b stxr w22, w27, [x19]
- 270: 880bfcd0 stlxr w11, w16, [x6]
- 274: 885f7c11 ldxr w17, [x0]
- 278: 885ffd44 ldaxr w4, [x10]
- 27c: 889ffed8 stlr w24, [x22]
- 280: 88dffe6a ldar w10, [x19]
- 284: 48017fc5 stxrh w1, w5, [x30]
- 288: 4808fe2c stlxrh w8, w12, [x17]
- 28c: 485f7dc9 ldxrh w9, [x14]
- 290: 485ffc27 ldaxrh w7, [x1]
- 294: 489ffe05 stlrh w5, [x16]
- 298: 48dffd82 ldarh w2, [x12]
- 29c: 080a7c6c stxrb w10, w12, [x3]
- 2a0: 081cff4e stlxrb w28, w14, [x26]
- 2a4: 085f7d5e ldxrb w30, [x10]
- 2a8: 085ffeae ldaxrb w14, [x21]
- 2ac: 089ffd2d stlrb w13, [x9]
- 2b0: 08dfff76 ldarb w22, [x27]
- 2b4: c87f4d7c ldxp x28, x19, [x11]
- 2b8: c87fcc5e ldaxp x30, x19, [x2]
- 2bc: c8220417 stxp w2, x23, x1, [x0]
- 2c0: c82cb5f0 stlxp w12, x16, x13, [x15]
- 2c4: 887f55b1 ldxp w17, w21, [x13]
- 2c8: 887ff90b ldaxp w11, w30, [x8]
- 2cc: 88382c2d stxp w24, w13, w11, [x1]
- 2d0: 883aedb5 stlxp w26, w21, w27, [x13]
- 2d4: f819928b stur x11, [x20, #-103]
- 2d8: b803e21c stur w28, [x16, #62]
- 2dc: 381f713b sturb w27, [x9, #-9]
- 2e0: 781ce322 sturh w2, [x25, #-50]
- 2e4: f850f044 ldur x4, [x2, #-241]
- 2e8: b85e129e ldur w30, [x20, #-31]
- 2ec: 385e92f1 ldurb w17, [x23, #-23]
- 2f0: 785ff35d ldurh w29, [x26, #-1]
- 2f4: 39801921 ldrsb x1, [x9, #6]
- 2f8: 7881318b ldursh x11, [x12, #19]
- 2fc: 78dce02b ldursh w11, [x1, #-50]
- 300: b8829313 ldursw x19, [x24, #41]
- 304: fc45f318 ldur d24, [x24, #95]
- 308: bc5d50af ldur s15, [x5, #-43]
- 30c: fc001375 stur d21, [x27, #1]
- 310: bc1951b7 stur s23, [x13, #-107]
- 314: f8008c0b str x11, [x0, #8]!
- 318: b801dc03 str w3, [x0, #29]!
- 31c: 38009dcb strb w11, [x14, #9]!
- 320: 781fdf1d strh w29, [x24, #-3]!
- 324: f8570e2d ldr x13, [x17, #-144]!
- 328: b85faecc ldr w12, [x22, #-6]!
- 32c: 385f6d8d ldrb w13, [x12, #-10]!
- 330: 785ebea0 ldrh w0, [x21, #-21]!
- 334: 38804cf7 ldrsb x23, [x7, #4]!
- 338: 789cbce3 ldrsh x3, [x7, #-53]!
- 33c: 78df9cbc ldrsh w28, [x5, #-7]!
- 340: b89eed38 ldrsw x24, [x9, #-18]!
- 344: fc40cd6e ldr d14, [x11, #12]!
- 348: bc5bdd93 ldr s19, [x12, #-67]!
- 34c: fc103c14 str d20, [x0, #-253]!
- 350: bc040c08 str s8, [x0, #64]!
- 354: f81a2784 str x4, [x28], #-94
- 358: b81ca4ec str w12, [x7], #-54
- 35c: 381e855b strb w27, [x10], #-24
- 360: 7801b506 strh w6, [x8], #27
- 364: f853654e ldr x14, [x10], #-202
- 368: b85d74b0 ldr w16, [x5], #-41
- 36c: 384095c2 ldrb w2, [x14], #9
- 370: 785ec5bc ldrh w28, [x13], #-20
- 374: 389e15a9 ldrsb x9, [x13], #-31
- 378: 789dc703 ldrsh x3, [x24], #-36
- 37c: 78c06474 ldrsh w20, [x3], #6
- 380: b89ff667 ldrsw x7, [x19], #-1
- 384: fc57e51e ldr d30, [x8], #-130
- 388: bc4155f9 ldr s25, [x15], #21
- 38c: fc05a6ee str d14, [x23], #90
- 390: bc1df408 str s8, [x0], #-33
- 394: f835da2a str x10, [x17, w21, sxtw #3]
- 398: b836d9a4 str w4, [x13, w22, sxtw #2]
- 39c: 3833580d strb w13, [x0, w19, uxtw #0]
- 3a0: 7826cb6c strh w12, [x27, w6, sxtw]
- 3a4: f8706900 ldr x0, [x8, x16]
- 3a8: b87ae880 ldr w0, [x4, x26, sxtx]
- 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0]
- 3b0: 78714889 ldrh w9, [x4, w17, uxtw]
- 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0]
- 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw]
- 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw]
- 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2]
- 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3]
- 3c8: bc734963 ldr s3, [x11, w19, uxtw]
- 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3]
- 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2]
- 3d4: f9189d05 str x5, [x8, #12600]
- 3d8: b91ecb1d str w29, [x24, #7880]
- 3dc: 39187a33 strb w19, [x17, #1566]
- 3e0: 791f226d strh w13, [x19, #3984]
- 3e4: f95aa2f3 ldr x19, [x23, #13632]
- 3e8: b9587bb7 ldr w23, [x29, #6264]
- 3ec: 395f7176 ldrb w22, [x11, #2012]
- 3f0: 795d9143 ldrh w3, [x10, #3784]
- 3f4: 399e7e08 ldrsb x8, [x16, #1951]
- 3f8: 799a2697 ldrsh x23, [x20, #3346]
- 3fc: 79df3422 ldrsh w2, [x1, #3994]
- 400: b99c2624 ldrsw x4, [x17, #7204]
- 404: fd5c2374 ldr d20, [x27, #14400]
- 408: bd5fa1d9 ldr s25, [x14, #8096]
- 40c: fd1d595a str d26, [x10, #15024]
- 410: bd1b1869 str s9, [x3, #6936]
- 414: 58002cfb ldr x27, 9b0
- 418: 1800000b ldr w11, 418
- 41c: f8945060 prfum pldl1keep, [x3, #-187]
- 420: d8000000 prfm pldl1keep, 420
- 424: f8ae6ba0 prfm pldl1keep, [x29, x14]
- 428: f99a0080 prfm pldl1keep, [x4, #13312]
- 42c: 1a070035 adc w21, w1, w7
- 430: 3a0700a8 adcs w8, w5, w7
- 434: 5a0e0367 sbc w7, w27, w14
- 438: 7a11009b sbcs w27, w4, w17
- 43c: 9a000380 adc x0, x28, x0
- 440: ba1e030c adcs x12, x24, x30
- 444: da0f0320 sbc x0, x25, x15
- 448: fa030301 sbcs x1, x24, x3
- 44c: 0b340b11 add w17, w24, w20, uxtb #2
- 450: 2b2a278d adds w13, w28, w10, uxth #1
- 454: cb22aa0f sub x15, x16, w2, sxth #2
- 458: 6b2d29bd subs w29, w13, w13, uxth #2
- 45c: 8b2cce8c add x12, x20, w12, sxtw #3
- 460: ab2b877e adds x30, x27, w11, sxtb #1
- 464: cb21c8ee sub x14, x7, w1, sxtw #2
- 468: eb3ba47d subs x29, x3, w27, sxth #1
- 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first
- 470: 7a5132c6 ccmp w22, w17, #0x6, cc // cc = lo, ul, last
- 474: ba5e622e ccmn x17, x30, #0xe, vs
- 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore
- 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le
- 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore
- 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last
- 488: fa4d7882 ccmp x4, #0xd, #0x2, vc
- 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore
- 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast
- 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop
- 498: 5a96976b csneg w11, w27, w22, ls // ls = plast
- 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop
- 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last
- 4a4: da95c14f csinv x15, x10, x21, gt
- 4a8: da89c6fe csneg x30, x23, x9, gt
- 4ac: 5ac0015e rbit w30, w10
- 4b0: 5ac005fd rev16 w29, w15
- 4b4: 5ac00bdd rev w29, w30
- 4b8: 5ac012b9 clz w25, w21
- 4bc: 5ac01404 cls w4, w0
- 4c0: dac002b1 rbit x17, x21
- 4c4: dac0061d rev16 x29, x16
- 4c8: dac00a95 rev32 x21, x20
- 4cc: dac00e66 rev x6, x19
- 4d0: dac0107e clz x30, x3
- 4d4: dac01675 cls x21, x19
- 4d8: 1ac00b0b udiv w11, w24, w0
- 4dc: 1ace0f3b sdiv w27, w25, w14
- 4e0: 1ad121c3 lsl w3, w14, w17
- 4e4: 1ad825e7 lsr w7, w15, w24
- 4e8: 1ad92a3c asr w28, w17, w25
- 4ec: 1adc2f42 ror w2, w26, w28
- 4f0: 9ada0b25 udiv x5, x25, x26
- 4f4: 9ad10e1b sdiv x27, x16, x17
- 4f8: 9acc22a6 lsl x6, x21, x12
- 4fc: 9acc2480 lsr x0, x4, x12
- 500: 9adc2a3b asr x27, x17, x28
- 504: 9ad12c5c ror x28, x2, x17
- 508: 9bce7dea umulh x10, x15, x14
- 50c: 9b597c6e smulh x14, x3, x25
- 510: 1b0e166f madd w15, w19, w14, w5
- 514: 1b1ae490 msub w16, w4, w26, w25
- 518: 9b023044 madd x4, x2, x2, x12
- 51c: 9b089e3d msub x29, x17, x8, x7
- 520: 9b391083 smaddl x3, w4, w25, x4
- 524: 9b24c73a smsubl x26, w25, w4, x17
- 528: 9bb15f40 umaddl x0, w26, w17, x23
- 52c: 9bbcc6af umsubl x15, w21, w28, x17
- 530: 1e23095b fmul s27, s10, s3
- 534: 1e3918e0 fdiv s0, s7, s25
- 538: 1e2f28c9 fadd s9, s6, s15
- 53c: 1e2a39fd fsub s29, s15, s10
- 540: 1e270a22 fmul s2, s17, s7
- 544: 1e77096b fmul d11, d11, d23
- 548: 1e771ba7 fdiv d7, d29, d23
- 54c: 1e6b2b6e fadd d14, d27, d11
- 550: 1e78388b fsub d11, d4, d24
- 554: 1e6e09ec fmul d12, d15, d14
- 558: 1f1c3574 fmadd s20, s11, s28, s13
- 55c: 1f17f98b fmsub s11, s12, s23, s30
- 560: 1f2935da fnmadd s26, s14, s9, s13
- 564: 1f2574ea fnmadd s10, s7, s5, s29
- 568: 1f4b306f fmadd d15, d3, d11, d12
- 56c: 1f5ec7cf fmsub d15, d30, d30, d17
- 570: 1f6f3e93 fnmadd d19, d20, d15, d15
- 574: 1f6226a9 fnmadd d9, d21, d2, d9
- 578: 1e2040fb fmov s27, s7
- 57c: 1e20c3dd fabs s29, s30
- 580: 1e214031 fneg s17, s1
- 584: 1e21c0c2 fsqrt s2, s6
- 588: 1e22c06a fcvt d10, s3
- 58c: 1e604178 fmov d24, d11
- 590: 1e60c027 fabs d7, d1
- 594: 1e61400b fneg d11, d0
- 598: 1e61c223 fsqrt d3, d17
- 59c: 1e6240dc fcvt s28, d6
- 5a0: 1e3800d6 fcvtzs w22, s6
- 5a4: 9e380360 fcvtzs x0, s27
- 5a8: 1e78005a fcvtzs w26, d2
- 5ac: 9e7800e5 fcvtzs x5, d7
- 5b0: 1e22017c scvtf s28, w11
- 5b4: 9e2201b9 scvtf s25, x13
- 5b8: 1e6202eb scvtf d11, w23
- 5bc: 9e620113 scvtf d19, x8
- 5c0: 1e2602b1 fmov w17, s21
- 5c4: 9e660299 fmov x25, d20
- 5c8: 1e270233 fmov s19, w17
- 5cc: 9e6703a2 fmov d2, x29
- 5d0: 1e2822c0 fcmp s22, s8
- 5d4: 1e7322a0 fcmp d21, d19
- 5d8: 1e202288 fcmp s20, #0.0
- 5dc: 1e602168 fcmp d11, #0.0
- 5e0: 293c19f4 stp w20, w6, [x15, #-32]
- 5e4: 2966387b ldp w27, w14, [x3, #-208]
- 5e8: 69762971 ldpsw x17, x10, [x11, #-80]
- 5ec: a9041dc7 stp x7, x7, [x14, #64]
- 5f0: a9475c0c ldp x12, x23, [x0, #112]
- 5f4: 29b61ccd stp w13, w7, [x6, #-80]!
- 5f8: 29ee405e ldp w30, w16, [x2, #-144]!
- 5fc: 69ee0744 ldpsw x4, x1, [x26, #-144]!
- 600: a9843977 stp x23, x14, [x11, #64]!
- 604: a9f46ebd ldp x29, x27, [x21, #-192]!
- 608: 28ba16b6 stp w22, w5, [x21], #-48
- 60c: 28fc44db ldp w27, w17, [x6], #-32
- 610: 68f61831 ldpsw x17, x6, [x1], #-80
- 614: a8b352ad stp x13, x20, [x21], #-208
- 618: a8c56d5e ldp x30, x27, [x10], #80
- 61c: 28024565 stnp w5, w17, [x11, #16]
- 620: 2874134e ldnp w14, w4, [x26, #-96]
- 624: a8027597 stnp x23, x29, [x12, #32]
- 628: a87b1aa0 ldnp x0, x6, [x21, #-80]
- 62c: 0c40734f ld1 {v15.8b}, [x26]
- 630: 4cdfa177 ld1 {v23.16b, v24.16b}, [x11], #32
- 634: 0cc76ee8 ld1 {v8.1d-v10.1d}, [x23], x7
- 638: 4cdf2733 ld1 {v19.8h-v22.8h}, [x25], #64
- 63c: 0d40c23d ld1r {v29.8b}, [x17]
- 640: 4ddfcaf8 ld1r {v24.4s}, [x23], #4
- 644: 0dd9ccaa ld1r {v10.1d}, [x5], x25
- 648: 4c408d51 ld2 {v17.2d, v18.2d}, [x10]
- 64c: 0cdf85ec ld2 {v12.4h, v13.4h}, [x15], #16
- 650: 4d60c239 ld2r {v25.16b, v26.16b}, [x17]
- 654: 0dffcbc1 ld2r {v1.2s, v2.2s}, [x30], #8
- 658: 4de9ce30 ld2r {v16.2d, v17.2d}, [x17], x9
- 65c: 4cc24999 ld3 {v25.4s-v27.4s}, [x12], x2
- 660: 0c404a7a ld3 {v26.2s-v28.2s}, [x19]
- 664: 4d40e6af ld3r {v15.8h-v17.8h}, [x21]
- 668: 4ddfe9b9 ld3r {v25.4s-v27.4s}, [x13], #12
- 66c: 0dddef8e ld3r {v14.1d-v16.1d}, [x28], x29
- 670: 4cdf07b1 ld4 {v17.8h-v20.8h}, [x29], #64
- 674: 0cc000fb ld4 {v27.8b-v30.8b}, [x7], x0
- 678: 0d60e238 ld4r {v24.8b-v27.8b}, [x17]
- 67c: 0dffe740 ld4r {v0.4h-v3.4h}, [x26], #8
- 680: 0de2eb2c ld4r {v12.2s-v15.2s}, [x25], x2
- 684: ce648376 sha512h q22, q27, v4.2d
- 688: ce6184c7 sha512h2 q7, q6, v1.2d
- 68c: cec081fa sha512su0 v26.2d, v15.2d
- 690: ce6d89a2 sha512su1 v2.2d, v13.2d, v13.2d
- 694: ba5fd3e3 ccmn xzr, xzr, #0x3, le
- 698: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
- 69c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
- 6a0: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
- 6a4: 93df03ff ror xzr, xzr, #0
- 6a8: c820ffff stlxp w0, xzr, xzr, [sp]
- 6ac: 8822fc7f stlxp w2, wzr, wzr, [x3]
- 6b0: c8247cbf stxp w4, xzr, xzr, [x5]
- 6b4: 88267fff stxp w6, wzr, wzr, [sp]
- 6b8: 4e010fe0 dup v0.16b, wzr
- 6bc: 4e081fe1 mov v1.d[0], xzr
- 6c0: 4e0c1fe1 mov v1.s[1], wzr
- 6c4: 4e0a1fe1 mov v1.h[2], wzr
- 6c8: 4e071fe1 mov v1.b[3], wzr
- 6cc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
- 6d0: 05a08020 mov z0.s, p0/m, s1
- 6d4: 04b0e3e0 incw x0
- 6d8: 0470e7e1 dech x1
- 6dc: 042f9c20 lsl z0.b, z1.b, #7
- 6e0: 043f9c35 lsl z21.h, z1.h, #15
- 6e4: 047f9c20 lsl z0.s, z1.s, #31
- 6e8: 04ff9c20 lsl z0.d, z1.d, #63
- 6ec: 04299420 lsr z0.b, z1.b, #7
- 6f0: 04319160 asr z0.h, z11.h, #15
- 6f4: 0461943e lsr z30.s, z1.s, #31
- 6f8: 04a19020 asr z0.d, z1.d, #63
- 6fc: 042053ff addvl sp, x0, #31
- 700: 047f5401 addpl x1, sp, #-32
- 704: 25208028 cntp x8, p0, p1.b
- 708: 2538cfe0 mov z0.b, #127
- 70c: 2578d001 mov z1.h, #-128
- 710: 25b8efe2 mov z2.s, #32512
- 714: 25f8f007 mov z7.d, #-32768
- 718: a400a3e0 ld1b {z0.b}, p0/z, [sp]
- 71c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
- 720: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
- 724: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
- 728: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
- 72c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
- 730: e400fbf6 st1b {z22.b}, p6, [sp]
- 734: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
- 738: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
- 73c: e4014be0 st1b {z0.b}, p2, [sp, x1]
- 740: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
- 744: e5f15000 st1d {z0.d}, p4, [x0, x17, lsl #3]
- 748: 858043e0 ldr z0, [sp]
- 74c: 85a043ff ldr z31, [sp, #-256, mul vl]
- 750: e59f5d08 str z8, [x8, #255, mul vl]
- 754: 1e601000 fmov d0, #2.000000000000000000e+00
- 758: 1e603000 fmov d0, #2.125000000000000000e+00
- 75c: 1e621000 fmov d0, #4.000000000000000000e+00
- 760: 1e623000 fmov d0, #4.250000000000000000e+00
- 764: 1e641000 fmov d0, #8.000000000000000000e+00
- 768: 1e643000 fmov d0, #8.500000000000000000e+00
- 76c: 1e661000 fmov d0, #1.600000000000000000e+01
- 770: 1e663000 fmov d0, #1.700000000000000000e+01
- 774: 1e681000 fmov d0, #1.250000000000000000e-01
- 778: 1e683000 fmov d0, #1.328125000000000000e-01
- 77c: 1e6a1000 fmov d0, #2.500000000000000000e-01
- 780: 1e6a3000 fmov d0, #2.656250000000000000e-01
- 784: 1e6c1000 fmov d0, #5.000000000000000000e-01
- 788: 1e6c3000 fmov d0, #5.312500000000000000e-01
- 78c: 1e6e1000 fmov d0, #1.000000000000000000e+00
- 790: 1e6e3000 fmov d0, #1.062500000000000000e+00
- 794: 1e701000 fmov d0, #-2.000000000000000000e+00
- 798: 1e703000 fmov d0, #-2.125000000000000000e+00
- 79c: 1e721000 fmov d0, #-4.000000000000000000e+00
- 7a0: 1e723000 fmov d0, #-4.250000000000000000e+00
- 7a4: 1e741000 fmov d0, #-8.000000000000000000e+00
- 7a8: 1e743000 fmov d0, #-8.500000000000000000e+00
- 7ac: 1e761000 fmov d0, #-1.600000000000000000e+01
- 7b0: 1e763000 fmov d0, #-1.700000000000000000e+01
- 7b4: 1e781000 fmov d0, #-1.250000000000000000e-01
- 7b8: 1e783000 fmov d0, #-1.328125000000000000e-01
- 7bc: 1e7a1000 fmov d0, #-2.500000000000000000e-01
- 7c0: 1e7a3000 fmov d0, #-2.656250000000000000e-01
- 7c4: 1e7c1000 fmov d0, #-5.000000000000000000e-01
- 7c8: 1e7c3000 fmov d0, #-5.312500000000000000e-01
- 7cc: 1e7e1000 fmov d0, #-1.000000000000000000e+00
- 7d0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
- 7d4: f8388098 swp x24, x24, [x4]
- 7d8: f8340010 ldadd x20, x16, [x0]
- 7dc: f8241175 ldclr x4, x21, [x11]
- 7e0: f83e22d0 ldeor x30, x16, [x22]
- 7e4: f82432ef ldset x4, x15, [x23]
- 7e8: f83a5186 ldsmin x26, x6, [x12]
- 7ec: f82f41ee ldsmax x15, x14, [x15]
- 7f0: f82973b9 ldumin x9, x25, [x29]
- 7f4: f82b6194 ldumax x11, x20, [x12]
- 7f8: f8b08216 swpa x16, x22, [x16]
- 7fc: f8b50358 ldadda x21, x24, [x26]
- 800: f8a61206 ldclra x6, x6, [x16]
- 804: f8b02219 ldeora x16, x25, [x16]
- 808: f8bc3218 ldseta x28, x24, [x16]
- 80c: f8ba514f ldsmina x26, x15, [x10]
- 810: f8ad428e ldsmaxa x13, x14, [x20]
- 814: f8a173d7 ldumina x1, x23, [x30]
- 818: f8ae60c2 ldumaxa x14, x2, [x6]
- 81c: f8e38328 swpal x3, x8, [x25]
- 820: f8e003db ldaddal x0, x27, [x30]
- 824: f8e513c5 ldclral x5, x5, [x30]
- 828: f8eb2019 ldeoral x11, x25, [x0]
- 82c: f8ff3260 ldsetal xzr, x0, [x19]
- 830: f8fd513a ldsminal x29, x26, [x9]
- 834: f8fa41ec ldsmaxal x26, x12, [x15]
- 838: f8eb71eb lduminal x11, x11, [x15]
- 83c: f8f96316 ldumaxal x25, x22, [x24]
- 840: f8608171 swpl x0, x17, [x11]
- 844: f86600dd ldaddl x6, x29, [x6]
- 848: f86512a5 ldclrl x5, x5, [x21]
- 84c: f87321f0 ldeorl x19, x16, [x15]
- 850: f87e339b ldsetl x30, x27, [x28]
- 854: f861503c ldsminl x1, x28, [x1]
- 858: f874421d ldsmaxl x20, x29, [x16]
- 85c: f86d73aa lduminl x13, x10, [x29]
- 860: f87d62d3 ldumaxl x29, x19, [x22]
- 864: b82a83e4 swp w10, w4, [sp]
- 868: b83503e8 ldadd w21, w8, [sp]
- 86c: b833138a ldclr w19, w10, [x28]
- 870: b82220b9 ldeor w2, w25, [x5]
- 874: b82332c8 ldset w3, w8, [x22]
- 878: b83350ad ldsmin w19, w13, [x5]
- 87c: b83d42b8 ldsmax w29, w24, [x21]
- 880: b83a7078 ldumin w26, w24, [x3]
- 884: b83862fa ldumax w24, w26, [x23]
- 888: b8af8075 swpa w15, w21, [x3]
- 88c: b8b80328 ldadda w24, w8, [x25]
- 890: b8b41230 ldclra w20, w16, [x17]
- 894: b8a22001 ldeora w2, w1, [x0]
- 898: b8b83064 ldseta w24, w4, [x3]
- 89c: b8ac539f ldsmina w12, wzr, [x28]
- 8a0: b8aa405a ldsmaxa w10, w26, [x2]
- 8a4: b8ac73f0 ldumina w12, w16, [sp]
- 8a8: b8a163ad ldumaxa w1, w13, [x29]
- 8ac: b8e08193 swpal w0, w19, [x12]
- 8b0: b8f101b6 ldaddal w17, w22, [x13]
- 8b4: b8fc13fe ldclral w28, w30, [sp]
- 8b8: b8e1239a ldeoral w1, w26, [x28]
- 8bc: b8e4309e ldsetal w4, w30, [x4]
- 8c0: b8e6535e ldsminal w6, w30, [x26]
- 8c4: b8f04109 ldsmaxal w16, w9, [x8]
- 8c8: b8ec7280 lduminal w12, w0, [x20]
- 8cc: b8e16058 ldumaxal w1, w24, [x2]
- 8d0: b8608309 swpl w0, w9, [x24]
- 8d4: b87a03d0 ldaddl w26, w16, [x30]
- 8d8: b86312ea ldclrl w3, w10, [x23]
- 8dc: b86a21e4 ldeorl w10, w4, [x15]
- 8e0: b862310b ldsetl w2, w11, [x8]
- 8e4: b86a522f ldsminl w10, w15, [x17]
- 8e8: b862418a ldsmaxl w2, w10, [x12]
- 8ec: b86c71af lduminl w12, w15, [x13]
- 8f0: b8626287 ldumaxl w2, w7, [x20]
- 8f4: 042401f9 add z25.b, z15.b, z4.b
- 8f8: 04b10564 sub z4.s, z11.s, z17.s
- 8fc: 65ca0230 fadd z16.d, z17.d, z10.d
- 900: 65d90996 fmul z22.d, z12.d, z25.d
- 904: 65ca05dc fsub z28.d, z14.d, z10.d
- 908: 0456afc1 abs z1.h, p3/m, z30.h
- 90c: 0400044f add z15.b, p1/m, z15.b, z2.b
- 910: 0490920d asr z13.s, p4/m, z13.s, z16.s
- 914: 04daa163 cnt z3.d, p0/m, z11.d
- 918: 04d389c5 lsl z5.d, p2/m, z5.d, z14.d
- 91c: 0411829d lsr z29.b, p0/m, z29.b, z20.b
- 920: 04901774 mul z20.s, p5/m, z20.s, z27.s
- 924: 0417b89a neg z26.b, p6/m, z4.b
- 928: 041eb3d6 not z22.b, p4/m, z30.b
- 92c: 04480b6b smax z11.h, p2/m, z11.h, z27.h
- 930: 048a17dc smin z28.s, p5/m, z28.s, z30.s
- 934: 048105be sub z30.s, p1/m, z30.s, z13.s
- 938: 04dcb35e fabs z30.d, p4/m, z26.d
- 93c: 65808d6f fadd z15.s, p3/m, z15.s, z11.s
- 940: 65cd9e06 fdiv z6.d, p7/m, z6.d, z16.d
- 944: 65869cfb fmax z27.s, p7/m, z27.s, z7.s
- 948: 65c78893 fmin z19.d, p2/m, z19.d, z4.d
- 94c: 658292d1 fmul z17.s, p4/m, z17.s, z22.s
- 950: 04ddaebc fneg z28.d, p3/m, z21.d
- 954: 6582b451 frintm z17.s, p5/m, z2.s
- 958: 6580ade6 frintn z6.s, p3/m, z15.s
- 95c: 65c1b42c frintp z12.d, p5/m, z1.d
- 960: 658da631 fsqrt z17.s, p1/m, z17.s
- 964: 658195af fsub z15.s, p5/m, z15.s, z13.s
- 968: 65eb1f74 fmla z20.d, p7/m, z27.d, z11.d
- 96c: 65f723c3 fmls z3.d, p0/m, z30.d, z23.d
- 970: 65ba4b71 fnmla z17.s, p2/m, z27.s, z26.s
- 974: 65fe76c6 fnmls z6.d, p5/m, z22.d, z30.d
- 978: 04515f42 mla z2.h, p7/m, z26.h, z17.h
- 97c: 04117056 mls z22.b, p4/m, z2.b, z17.b
- 980: 04363338 and z24.d, z25.d, z22.d
- 984: 04a33191 eor z17.d, z12.d, z3.d
- 988: 0470339d orr z29.d, z28.d, z16.d
- 98c: 049a2b86 andv s6, p2, z28.s
- 990: 045824e7 orv h7, p1, z7.h
- 994: 04193509 eorv b9, p5, z8.b
- 998: 040837db smaxv b27, p5, z30.b
- 99c: 044a221a sminv h26, p0, z16.h
- 9a0: 65c73903 fminv d3, p6, z8.d
- 9a4: 65c63b55 fmaxv d21, p6, z26.d
- 9a8: 65982096 fadda s22, p0, s22, z4.s
- 9ac: 04412071 uaddv d17, p0, z3.h
- */
+*/
static const unsigned int insns[] =
{
@@ -1486,30 +1007,30 @@ Disassembly of section .text:
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
- 0x14000000, 0x17ffffd7, 0x14000242, 0x94000000,
- 0x97ffffd4, 0x9400023f, 0x3400000a, 0x34fffa2a,
- 0x3400478a, 0x35000008, 0x35fff9c8, 0x35004728,
- 0xb400000b, 0xb4fff96b, 0xb40046cb, 0xb500001d,
- 0xb5fff91d, 0xb500467d, 0x10000013, 0x10fff8b3,
- 0x10004613, 0x90000013, 0x36300016, 0x3637f836,
- 0x36304596, 0x3758000c, 0x375ff7cc, 0x3758452c,
+ 0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000,
+ 0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a,
+ 0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888,
+ 0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d,
+ 0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3,
+ 0x10005773, 0x90000013, 0x36300016, 0x3637f836,
+ 0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
- 0x54004300, 0x54000001, 0x54fff541, 0x540042a1,
- 0x54000002, 0x54fff4e2, 0x54004242, 0x54000002,
- 0x54fff482, 0x540041e2, 0x54000003, 0x54fff423,
- 0x54004183, 0x54000003, 0x54fff3c3, 0x54004123,
- 0x54000004, 0x54fff364, 0x540040c4, 0x54000005,
- 0x54fff305, 0x54004065, 0x54000006, 0x54fff2a6,
- 0x54004006, 0x54000007, 0x54fff247, 0x54003fa7,
- 0x54000008, 0x54fff1e8, 0x54003f48, 0x54000009,
- 0x54fff189, 0x54003ee9, 0x5400000a, 0x54fff12a,
- 0x54003e8a, 0x5400000b, 0x54fff0cb, 0x54003e2b,
- 0x5400000c, 0x54fff06c, 0x54003dcc, 0x5400000d,
- 0x54fff00d, 0x54003d6d, 0x5400000e, 0x54ffefae,
- 0x54003d0e, 0x5400000f, 0x54ffef4f, 0x54003caf,
+ 0x54005460, 0x54000001, 0x54fff541, 0x54005401,
+ 0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002,
+ 0x54fff482, 0x54005342, 0x54000003, 0x54fff423,
+ 0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283,
+ 0x54000004, 0x54fff364, 0x54005224, 0x54000005,
+ 0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6,
+ 0x54005166, 0x54000007, 0x54fff247, 0x54005107,
+ 0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009,
+ 0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a,
+ 0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b,
+ 0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d,
+ 0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae,
+ 0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -1541,7 +1062,7 @@ Disassembly of section .text:
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
- 0xbd1b1869, 0x58002cfb, 0x1800000b, 0xf8945060,
+ 0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1580,58 +1101,92 @@ Disassembly of section .text:
0x4d60c239, 0x0dffcbc1, 0x4de9ce30, 0x4cc24999,
0x0c404a7a, 0x4d40e6af, 0x4ddfe9b9, 0x0dddef8e,
0x4cdf07b1, 0x0cc000fb, 0x0d60e238, 0x0dffe740,
- 0x0de2eb2c, 0xce648376, 0xce6184c7, 0xcec081fa,
- 0xce6d89a2, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
- 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
- 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
- 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
- 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
- 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
- 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
- 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
- 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
- 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
- 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
- 0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff,
- 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
- 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
- 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
- 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
- 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
- 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
- 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
- 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
- 0x1e7e3000, 0xf8388098, 0xf8340010, 0xf8241175,
- 0xf83e22d0, 0xf82432ef, 0xf83a5186, 0xf82f41ee,
- 0xf82973b9, 0xf82b6194, 0xf8b08216, 0xf8b50358,
- 0xf8a61206, 0xf8b02219, 0xf8bc3218, 0xf8ba514f,
- 0xf8ad428e, 0xf8a173d7, 0xf8ae60c2, 0xf8e38328,
- 0xf8e003db, 0xf8e513c5, 0xf8eb2019, 0xf8ff3260,
- 0xf8fd513a, 0xf8fa41ec, 0xf8eb71eb, 0xf8f96316,
- 0xf8608171, 0xf86600dd, 0xf86512a5, 0xf87321f0,
- 0xf87e339b, 0xf861503c, 0xf874421d, 0xf86d73aa,
- 0xf87d62d3, 0xb82a83e4, 0xb83503e8, 0xb833138a,
- 0xb82220b9, 0xb82332c8, 0xb83350ad, 0xb83d42b8,
- 0xb83a7078, 0xb83862fa, 0xb8af8075, 0xb8b80328,
- 0xb8b41230, 0xb8a22001, 0xb8b83064, 0xb8ac539f,
- 0xb8aa405a, 0xb8ac73f0, 0xb8a163ad, 0xb8e08193,
- 0xb8f101b6, 0xb8fc13fe, 0xb8e1239a, 0xb8e4309e,
- 0xb8e6535e, 0xb8f04109, 0xb8ec7280, 0xb8e16058,
- 0xb8608309, 0xb87a03d0, 0xb86312ea, 0xb86a21e4,
- 0xb862310b, 0xb86a522f, 0xb862418a, 0xb86c71af,
- 0xb8626287, 0x042401f9, 0x04b10564, 0x65ca0230,
- 0x65d90996, 0x65ca05dc, 0x0456afc1, 0x0400044f,
- 0x0490920d, 0x04daa163, 0x04d389c5, 0x0411829d,
- 0x04901774, 0x0417b89a, 0x041eb3d6, 0x04480b6b,
- 0x048a17dc, 0x048105be, 0x04dcb35e, 0x65808d6f,
- 0x65cd9e06, 0x65869cfb, 0x65c78893, 0x658292d1,
- 0x04ddaebc, 0x6582b451, 0x6580ade6, 0x65c1b42c,
- 0x658da631, 0x658195af, 0x65eb1f74, 0x65f723c3,
- 0x65ba4b71, 0x65fe76c6, 0x04515f42, 0x04117056,
- 0x04363338, 0x04a33191, 0x0470339d, 0x049a2b86,
- 0x045824e7, 0x04193509, 0x040837db, 0x044a221a,
- 0x65c73903, 0x65c63b55, 0x65982096, 0x04412071,
-
+ 0x0de2eb2c, 0x0e31baf6, 0x4e31bb9b, 0x0e71b8a4,
+ 0x4e71b907, 0x4eb1b8e6, 0x0e30a841, 0x4e30ab7a,
+ 0x0e70aa0f, 0x4e70a862, 0x4eb0a9cd, 0x6e30f9cd,
+ 0x0e31ab38, 0x4e31ab17, 0x0e71a8a4, 0x4e71aa93,
+ 0x4eb1aa0f, 0x6eb0f820, 0x0e20b8a4, 0x4e20bab4,
+ 0x0e60b98b, 0x4e60bbdd, 0x0ea0ba0f, 0x4ea0bad5,
+ 0x4ee0b8a4, 0x0ea0f9ee, 0x4ea0faf6, 0x4ee0fb59,
+ 0x2ea0f8e6, 0x6ea0f9ac, 0x6ee0f9ee, 0x2ea1f9cd,
+ 0x6ea1f9ee, 0x6ee1f949, 0x2e205b59, 0x6e205bbc,
+ 0x0e2c1d6a, 0x4e351e93, 0x0ead1d8b, 0x4eb31e51,
+ 0x2e371ed5, 0x6e311e0f, 0x0e3686b4, 0x4e398717,
+ 0x0e7c877a, 0x4e6784c5, 0x0ea884e6, 0x4eb1860f,
+ 0x4ef1860f, 0x0e3bd759, 0x4e32d630, 0x4e7dd79b,
+ 0x2e3a8738, 0x6e31860f, 0x2e7b8759, 0x6e7085ee,
+ 0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4,
+ 0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee,
+ 0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07,
+ 0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd,
+ 0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b,
+ 0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420,
+ 0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49,
+ 0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b,
+ 0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5,
+ 0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a,
+ 0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5,
+ 0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f,
+ 0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc,
+ 0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4,
+ 0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a,
+ 0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6,
+ 0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4,
+ 0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a,
+ 0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4,
+ 0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651,
+ 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2,
+ 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf,
+ 0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1,
+ 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020,
+ 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35,
+ 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160,
+ 0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401,
+ 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2,
+ 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814,
+ 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6,
+ 0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0,
+ 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08,
+ 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
+ 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
+ 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
+ 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
+ 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
+ 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
+ 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
+ 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
+ 0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7,
+ 0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300,
+ 0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f,
+ 0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f,
+ 0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188,
+ 0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e,
+ 0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051,
+ 0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044,
+ 0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa,
+ 0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062,
+ 0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af,
+ 0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025,
+ 0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f,
+ 0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b,
+ 0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304,
+ 0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2,
+ 0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051,
+ 0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1,
+ 0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858,
+ 0xce768151, 0xce718451, 0xcec08300, 0xce628ad9,
+ 0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887,
+ 0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903,
+ 0x04dabb55, 0x04138096, 0x04518071, 0x041008c1,
+ 0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171,
+ 0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed,
+ 0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e,
+ 0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62,
+ 0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3,
+ 0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4,
+ 0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05,
+ 0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8,
+ 0x65c633bc, 0x65982c49, 0x040120fc,
};
// END Generated code -- do not edit
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 711d9db07e5..7ff9c018bef 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -217,7 +217,7 @@ class Instruction_aarch64 {
static void patch(address a, int msb, int lsb, uint64_t val) {
int nbits = msb - lsb + 1;
- guarantee(val < (1U << nbits), "Field too big for insn");
+ guarantee(val < (1ULL << nbits), "Field too big for insn");
assert_cond(msb >= lsb);
unsigned mask = (1U << nbits) - 1;
val <<= lsb;
@@ -445,8 +445,8 @@ class Address {
}
Register base() const {
- guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
- | _mode == post | _mode == post_reg),
+ guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
+ || _mode == post || _mode == post_reg),
"wrong mode");
return _base;
}
@@ -1371,6 +1371,21 @@ class Assembler : public AbstractAssembler {
#undef INSN
+#define INSN(NAME, size, opc) \
+ void NAME(FloatRegister Rt, Register Rn) { \
+ starti; \
+ f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \
+ f(0, 20, 12), f(0b01, 11, 10); \
+ rf(Rn, 5), rf((Register)Rt, 0); \
+ }
+
+ INSN(ldrs, 0b10, 0b01);
+ INSN(ldrd, 0b11, 0b01);
+ INSN(ldrq, 0b00, 0b11);
+
+#undef INSN
+
+
#define INSN(NAME, opc, V) \
void NAME(address dest, prfop op = PLDL1KEEP) { \
int64_t offset = (dest - pc()) >> 2; \
@@ -1508,6 +1523,21 @@ class Assembler : public AbstractAssembler {
#undef INSN
+/* SIMD extensions
+ *
+ * We just use FloatRegister in the following. They are exactly the same
+ * as SIMD registers.
+ */
+public:
+
+ enum SIMD_Arrangement {
+ T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
+ };
+
+ enum SIMD_RegVariant {
+ B, H, S, D, Q
+ };
+
enum shift_kind { LSL, LSR, ASR, ROR };
void op_shifted_reg(unsigned decode,
@@ -1887,6 +1917,30 @@ void mvnw(Register Rd, Register Rm,
i_fmovs(Vd, Vn);
}
+private:
+ void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
+ FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
+ assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
+ || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
+ starti;
+ int op30 = (do_extend ? Tb : Ta) & 1;
+ int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
+ f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
+ f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
+ rf(Vn, 5), rf(Vd, 0);
+ }
+
+public:
+ void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
+ _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
+ }
+
+ void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
+ _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
+ }
+
#undef INSN
// Floating-point data-processing (2 source)
@@ -2023,6 +2077,43 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
+ enum sign_kind { SIGNED, UNSIGNED };
+
+private:
+ void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
+ FloatRegister Rd, FloatRegister Rn) {
+ starti;
+ f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
+ f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
+ rf(Rn, 5), rf(Rd, 0);
+ }
+
+public:
+#define INSN(NAME, sign, sz) \
+ void NAME(FloatRegister Rd, FloatRegister Rn) { \
+ _xcvtf_scalar_integer(sign, sz, Rd, Rn); \
+ }
+
+ INSN(scvtfs, SIGNED, 0);
+ INSN(scvtfd, SIGNED, 1);
+
+#undef INSN
+
+private:
+ void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
+ FloatRegister Rd, FloatRegister Rn) {
+ assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
+ starti;
+ f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
+ f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
+ rf(Rn, 5), rf(Rd, 0);
+ }
+
+public:
+ void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
+ _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
+ }
+
// Floating-point compare
void float_compare(unsigned op31, unsigned type,
unsigned op, unsigned op2,
@@ -2152,21 +2243,6 @@ void mvnw(Register Rd, Register Rm,
INSN(frintzd, 0b01, 0b011);
#undef INSN
-/* SIMD extensions
- *
- * We just use FloatRegister in the following. They are exactly the same
- * as SIMD registers.
- */
- public:
-
- enum SIMD_Arrangement {
- T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
- };
-
- enum SIMD_RegVariant {
- B, H, S, D, Q
- };
-
private:
static short SIMD_Size_in_bytes[];
@@ -2324,6 +2400,11 @@ void mvnw(Register Rd, Register Rm,
INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
#undef INSN
@@ -2343,6 +2424,8 @@ void mvnw(Register Rd, Register Rm,
INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
+ INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
+ INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
@@ -2407,6 +2490,9 @@ void mvnw(Register Rd, Register Rm,
INSN(fmls, 0, 1, 0b110011);
INSN(fmax, 0, 0, 0b111101);
INSN(fmin, 0, 1, 0b111101);
+ INSN(fcmeq, 0, 0, 0b111001);
+ INSN(fcmgt, 1, 1, 0b111001);
+ INSN(fcmge, 1, 0, 0b111001);
#undef INSN
@@ -2464,6 +2550,40 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
+ starti; \
+ assert(T == T16B, "arrangement must be T16B"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(eor3, 0b000);
+ INSN(bcax, 0b001);
+
+#undef INSN
+
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
+ starti; \
+ assert(T == T2D, "arrangement must be T2D"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(xar, 0b100);
+
+#undef INSN
+
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+ starti; \
+ assert(T == T2D, "arrangement must be T2D"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(rax1, 0b011);
+
+#undef INSN
+
#define INSN(NAME, opc) \
void NAME(FloatRegister Vd, FloatRegister Vn) { \
starti; \
@@ -2506,10 +2626,20 @@ void mvnw(Register Rd, Register Rm,
rf(Vn, 5), rf(Vd, 0);
}
- // (double) {a, b} -> (a + b)
- void faddpd(FloatRegister Vd, FloatRegister Vn) {
+ // (long) {a, b} -> (a + b)
+ void addpd(FloatRegister Vd, FloatRegister Vn) {
starti;
- f(0b0111111001110000110110, 31, 10);
+ f(0b0101111011110001101110, 31, 10);
+ rf(Vn, 5), rf(Vd, 0);
+ }
+
+ // (Floating-point) {a, b} -> (a + b)
+ void faddp(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
+ assert(type == D || type == S, "Wrong type for faddp");
+ starti;
+ f(0b011111100, 31, 23);
+ f(type == D ? 1 : 0, 22);
+ f(0b110000110110, 21, 10);
rf(Vn, 5), rf(Vd, 0);
}
@@ -2558,6 +2688,8 @@ void mvnw(Register Rd, Register Rm,
INSN(shl, 0, 0b010101, /* isSHR = */ false);
INSN(sshr, 0, 0b000001, /* isSHR = */ true);
INSN(ushr, 1, 0b000001, /* isSHR = */ true);
+ INSN(usra, 1, 0b000101, /* isSHR = */ true);
+ INSN(ssra, 0, 0b000101, /* isSHAR =*/ true);
#undef INSN
@@ -2576,29 +2708,48 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
private:
- void _ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
starti;
/* The encodings for the immh:immb fields (bits 22:16) are
- * 0001 xxx 8H, 8B/16b shift = xxx
+ * 0001 xxx 8H, 8B/16B shift = xxx
* 001x xxx 4S, 4H/8H shift = xxxx
* 01xx xxx 2D, 2S/4S shift = xxxxx
* 1xxx xxx RESERVED
*/
assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
- f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
+ f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
+ f((1 << ((Tb>>1)+3))|shift, 22, 16);
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
public:
void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
- _ushll(Vd, Ta, Vn, Tb, shift);
+ _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
}
void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
- _ushll(Vd, Ta, Vn, Tb, shift);
+ _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ ushll(Vd, Ta, Vn, Tb, 0);
+ }
+
+ void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
+ _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
+ _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ sshll(Vd, Ta, Vn, Tb, 0);
}
// Move from general purpose register
@@ -2649,6 +2800,15 @@ void mvnw(Register Rd, Register Rm,
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
}
+ void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
+ starti;
+ int size_b = (int)Tb >> 1;
+ int size_a = (int)Ta >> 1;
+ assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
+ f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
+ f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
+ }
+
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
{
starti;
@@ -3062,13 +3222,6 @@ void mvnw(Register Rd, Register Rm,
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- ShouldNotCallThis();
- return RegisterOrConstant();
- }
-
// Stack overflow checking
virtual void bang_stack_with_offset(int offset);
diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
index 99469bb04c0..119bc979e0a 100644
--- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
@@ -38,6 +38,19 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset());
+ __ adr(rscratch1, safepoint_pc);
+ __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
+
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ __ far_jump(RuntimeAddress(stub));
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
Metadata *m = _method->as_constant_ptr()->as_metadata();
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index b5ab058d44c..8dac1d9ebe8 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -504,7 +504,7 @@ void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
}
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
// Pop the stack before the safepoint code
@@ -514,7 +514,9 @@ void LIR_Assembler::return_op(LIR_Opr result) {
__ reserved_stack_check();
}
- __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type);
+ code_stub->set_safepoint_offset(__ offset());
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
__ ret(lr);
}
diff --git a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
index 4e4262d5d6d..d2520014ed1 100644
--- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
@@ -34,8 +34,6 @@
#ifndef TIERED
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, true );
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 24b32187b7c..032e9e80756 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -538,6 +538,70 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
BIND(DONE);
}
+void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2, Register tmp3)
+{
+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
+ Register cnt1_neg = cnt1;
+ Register ch1 = rscratch1;
+ Register result_tmp = rscratch2;
+
+ cbz(cnt1, NOMATCH);
+
+ cmp(cnt1, (u1)8);
+ br(LT, DO1_SHORT);
+
+ orr(ch, ch, ch, LSL, 8);
+ orr(ch, ch, ch, LSL, 16);
+ orr(ch, ch, ch, LSL, 32);
+
+ sub(cnt1, cnt1, 8);
+ mov(result_tmp, cnt1);
+ lea(str1, Address(str1, cnt1));
+ sub(cnt1_neg, zr, cnt1);
+
+ mov(tmp3, 0x0101010101010101);
+
+ BIND(CH1_LOOP);
+ ldr(ch1, Address(str1, cnt1_neg));
+ eor(ch1, ch, ch1);
+ sub(tmp1, ch1, tmp3);
+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
+ bics(tmp1, tmp1, tmp2);
+ br(NE, HAS_ZERO);
+ adds(cnt1_neg, cnt1_neg, 8);
+ br(LT, CH1_LOOP);
+
+ cmp(cnt1_neg, (u1)8);
+ mov(cnt1_neg, 0);
+ br(LT, CH1_LOOP);
+ b(NOMATCH);
+
+ BIND(HAS_ZERO);
+ rev(tmp1, tmp1);
+ clz(tmp1, tmp1);
+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
+ b(MATCH);
+
+ BIND(DO1_SHORT);
+ mov(result_tmp, cnt1);
+ lea(str1, Address(str1, cnt1));
+ sub(cnt1_neg, zr, cnt1);
+ BIND(DO1_LOOP);
+ ldrb(ch1, Address(str1, cnt1_neg));
+ cmp(ch, ch1);
+ br(EQ, MATCH);
+ adds(cnt1_neg, cnt1_neg, 1);
+ br(LT, DO1_LOOP);
+ BIND(NOMATCH);
+ mov(result, -1);
+ b(DONE);
+ BIND(MATCH);
+ add(result, result_tmp, cnt1_neg);
+ BIND(DONE);
+}
+
// Compare strings.
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index f359e35974a..b2f6226bf9e 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -45,4 +45,8 @@
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
+ void stringL_indexof_char(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2, Register tmp3);
+
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
index 973cbe740bd..5a019eba6ae 100644
--- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
@@ -33,8 +33,6 @@
// (see c2_globals.hpp). Alpha-sorted.
define_pd_global(bool, BackgroundCompilation, true);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(bool, CICompileOSR, true);
define_pd_global(bool, InlineIntrinsics, true);
define_pd_global(bool, PreferInterpreterNativeStubs, false);
diff --git a/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
new file mode 100644
index 00000000000..fb36406fbde
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ masm.
+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ RuntimeAddress callback_addr(stub);
+
+ __ bind(entry->_stub_label);
+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+ __ adr(rscratch1, safepoint_pc);
+ __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
+ __ far_jump(callback_addr);
+}
+#undef __
diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
index 75cc249cf08..2e89960778e 100644
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
@@ -36,6 +36,9 @@
#define __ _masm.
address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+ precond(cbuf.stubs()->start() != badAddress);
+ precond(cbuf.stubs()->end() != badAddress);
+
// Stub is fixed up when the corresponding call is converted from
// calling compiled code to calling interpreted code.
// mov rmethod, 0
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
index 46261c70dbe..15c5e16f380 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
@@ -37,6 +37,7 @@
#include "runtime/monitorChunk.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/signature.hpp"
+#include "runtime/stackWatermarkSet.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "vmreg_aarch64.inline.hpp"
@@ -476,8 +477,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
}
//------------------------------------------------------------------------------
-// frame::sender
-frame frame::sender(RegisterMap* map) const {
+// frame::sender_raw
+frame frame::sender_raw(RegisterMap* map) const {
// Default is we done have to follow them. The sender_for_xxx will
// update it accordingly
map->set_include_argument_oops(false);
@@ -499,6 +500,16 @@ frame frame::sender(RegisterMap* map) const {
return frame(sender_sp(), link(), sender_pc());
}
+frame frame::sender(RegisterMap* map) const {
+ frame result = sender_raw(map);
+
+ if (map->process_frames()) {
+ StackWatermarkSet::on_iteration(map->thread(), result);
+ }
+
+ return result;
+}
+
bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
assert(is_interpreted_frame(), "Not an interpreted frame");
// These are reasonable sanity checks
@@ -651,11 +662,12 @@ intptr_t* frame::real_fp() const {
#undef DESCRIBE_FP_OFFSET
-#define DESCRIBE_FP_OFFSET(name) \
- { \
- uintptr_t *p = (uintptr_t *)fp; \
- printf("0x%016lx 0x%016lx %s\n", (uintptr_t)(p + frame::name##_offset), \
- p[frame::name##_offset], #name); \
+#define DESCRIBE_FP_OFFSET(name) \
+ { \
+ uintptr_t *p = (uintptr_t *)fp; \
+ printf(INTPTR_FORMAT " " INTPTR_FORMAT " %s\n", \
+ (uintptr_t)(p + frame::name##_offset), \
+ p[frame::name##_offset], #name); \
}
static THREAD_LOCAL uintptr_t nextfp;
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
index 6c639a05961..e2490d28611 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
@@ -161,4 +161,7 @@
static jint interpreter_frame_expression_stack_direction() { return -1; }
+ // returns the sending frame, without applying any barriers
+ frame sender_raw(RegisterMap* map) const;
+
#endif // CPU_AARCH64_FRAME_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
index b3530509b03..db9c7577e60 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
@@ -109,7 +109,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
__ xchg(access.resolved_addr(), value_opr, result, tmp);
if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false);
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), ShenandoahBarrierSet::AccessKind::NORMAL);
LIR_Opr tmp = gen->new_register(type);
__ move(result, tmp);
result = tmp;
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index c2d53df4f67..840464b251f 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -43,8 +43,6 @@
#define __ masm->
-address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-
void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, RegSet saved_regs) {
if (is_oop) {
@@ -227,18 +225,18 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind) {
assert(ShenandoahLoadRefBarrier, "Should be enabled");
assert(dst != rscratch2, "need rscratch2");
assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
- Label done;
+ Label heap_stable, not_cset;
__ enter();
Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
__ ldrb(rscratch2, gc_state);
// Check for heap stability
- __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
+ __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
// use r1 for load address
Register result_dst = dst;
@@ -253,51 +251,48 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
__ lea(r1, load_addr);
__ mov(r0, dst);
- __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
-
- __ mov(result_dst, r0);
- __ pop(to_save, sp);
-
- __ bind(done);
- __ leave();
-}
-
-void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr) {
- if (!ShenandoahLoadRefBarrier) {
- return;
+ // Test for in-cset
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
+ __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ ldrb(rscratch2, Address(rscratch2, rscratch1));
+ __ tbz(rscratch2, 0, not_cset);
}
- assert(dst != rscratch2, "need rscratch2");
-
- Label is_null;
- Label done;
-
- __ block_comment("load_reference_barrier_native { ");
-
- __ cbz(dst, is_null);
-
- __ enter();
-
- Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- __ ldrb(rscratch2, gc_state);
-
- // Check for heap in evacuation phase
- __ tbz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, done);
-
- __ mov(rscratch2, dst);
__ push_call_clobbered_registers();
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
- __ lea(r1, load_addr);
- __ mov(r0, rscratch2);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
__ blr(lr);
- __ mov(rscratch2, r0);
+ __ mov(rscratch1, r0);
__ pop_call_clobbered_registers();
- __ mov(dst, rscratch2);
+ __ mov(r0, rscratch1);
- __ bind(done);
+ __ bind(not_cset);
+
+ __ mov(result_dst, r0);
+ __ pop(to_save, sp);
+
+ __ bind(heap_stable);
__ leave();
- __ bind(is_null);
- __ block_comment("} load_reference_barrier_native");
}
void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
@@ -308,15 +303,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Regis
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
- if (ShenandoahLoadRefBarrier) {
- Label is_null;
- __ cbz(dst, is_null);
- load_reference_barrier_not_null(masm, dst, load_addr);
- __ bind(is_null);
- }
-}
-
//
// Arguments:
//
@@ -352,11 +338,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) {
- load_reference_barrier_native(masm, dst, src);
- } else {
- load_reference_barrier(masm, dst, src);
- }
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type);
+ load_reference_barrier(masm, dst, src, kind);
if (dst != result_dst) {
__ mov(result_dst, dst);
@@ -477,7 +460,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
bool is_narrow = UseCompressedOops;
Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
- assert_different_registers(addr, expected, new_val, tmp1, tmp2);
+ assert_different_registers(addr, expected, tmp1, tmp2);
+ assert_different_registers(addr, new_val, tmp1, tmp2);
Label step4, done;
@@ -669,10 +653,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
__ bind(slow_path);
ce->store_parameter(res, 0);
ce->store_parameter(addr, 1);
- if (stub->is_native()) {
- __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
- } else {
- __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+ switch (stub->kind()) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ b(*stub->continuation());
@@ -728,19 +720,33 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ epilogue();
}
-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) {
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) {
__ prologue("shenandoah_load_reference_barrier", false);
// arg0 : object to be resolved
__ push_call_clobbered_registers();
__ load_parameter(0, r0);
__ load_parameter(1, r1);
- if (is_native) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
- } else if (UseCompressedOops) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
- } else {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ blr(lr);
__ mov(rscratch1, r0);
@@ -753,67 +759,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
-
-address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
- assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
- return _shenandoah_lrb;
-}
-
-#define __ cgen->assembler()->
-
-// Shenandoah load reference barrier.
-//
-// Input:
-// r0: OOP to evacuate. Not null.
-// r1: load address
-//
-// Output:
-// r0: Pointer to evacuated OOP.
-//
-// Trash rscratch1, rscratch2. Preserve everything else.
-address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
-
- __ align(6);
- StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
- address start = __ pc();
-
- Label slow_path;
- __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
- __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
- __ ldrb(rscratch2, Address(rscratch2, rscratch1));
- __ tbnz(rscratch2, 0, slow_path);
- __ ret(lr);
-
- __ bind(slow_path);
- __ enter(); // required for proper stackwalking of RuntimeStub frame
-
- __ push_call_clobbered_registers();
-
- if (UseCompressedOops) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
- } else {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
- }
- __ blr(lr);
- __ mov(rscratch1, r0);
- __ pop_call_clobbered_registers();
- __ mov(r0, rscratch1);
-
- __ leave(); // required for proper stackwalking of RuntimeStub frame
- __ ret(lr);
-
- return start;
-}
-
-#undef __
-
-void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
- if (ShenandoahLoadRefBarrier) {
- int stub_code_size = 2048;
- ResourceMark rm;
- BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
- CodeBuffer buf(bb);
- StubCodeGenerator cgen(&buf);
- _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
- }
-}
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
index 88aa9a2b95f..60303725fd8 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
@@ -27,6 +27,7 @@
#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
#ifdef COMPILER1
class LIR_Assembler;
class ShenandoahPreBarrierStub;
@@ -38,8 +39,6 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- static address _shenandoah_lrb;
-
void satb_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -57,14 +56,9 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
- void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
- void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
- void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr);
-
- address generate_shenandoah_lrb(StubCodeGenerator* cgen);
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind);
public:
- static address shenandoah_lrb();
void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp);
@@ -72,7 +66,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind);
#endif
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
@@ -85,8 +79,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
Register obj, Register tmp, Label& slowpath);
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
bool acquire, bool release, bool is_cae, Register result);
-
- virtual void barrier_stubs_init();
};
#endif // CPU_AARCH64_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
index 35e261fa7ae..3187808b65a 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
@@ -24,10 +24,9 @@
#ifndef CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP
#define CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP
-const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-const size_t ZPlatformHeapViews = 3;
-const size_t ZPlatformNMethodDisarmedOffset = 4;
-const size_t ZPlatformCacheLineSize = 64;
+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+const size_t ZPlatformHeapViews = 3;
+const size_t ZPlatformCacheLineSize = 64;
size_t ZPlatformAddressOffsetBits();
size_t ZPlatformAddressMetadataShift();
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
index 294b6b13495..9ad1360fa91 100644
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@@ -93,6 +93,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use SIMD instructions in generated array equals code") \
product(bool, UseSimpleArrayEquals, false, \
"Use simpliest and shortest implementation for array equals") \
+ product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \
+ "Use SIMD instructions for left/right shift of BigInteger") \
product(bool, AvoidUnalignedAccesses, false, \
"Avoid generating unaligned memory accesses") \
product(bool, UseLSE, false, \
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
index 1d635429336..09632154630 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@@ -473,7 +473,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (needs_thread_local_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
+ ldr(rscratch2, Address(rthread, Thread::polling_word_offset()));
tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
}
@@ -521,6 +521,7 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
// remove activation
//
+// Apply stack watermark barrier.
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from syncronized blocks.
// Remove the activation from the stack.
@@ -541,6 +542,21 @@ void InterpreterMacroAssembler::remove_activation(
// result check if synchronized method
Label unlocked, unlock, no_unlock;
+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+ // that would normally not be safe to use. Such bad returns into unsafe territory of
+ // the stack, will call InterpreterRuntime::at_unwind.
+ Label slow_path;
+ Label fast_path;
+ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
+ br(Assembler::AL, fast_path);
+ bind(slow_path);
+ push(state);
+ set_last_Java_frame(esp, rfp, (address)pc(), rscratch1);
+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread);
+ reset_last_Java_frame(true);
+ pop(state);
+ bind(fast_path);
+
// get the value of _do_not_unlock_if_synchronized into r3
const Address do_not_unlock_if_synchronized(rthread,
in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
diff --git a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
index 3156b4b8e83..f41d79e1021 100644
--- a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
@@ -21,8 +21,9 @@
* questions.
*/
- #include "jvmci/jvmci.hpp"
- #include "jvmci/jvmciCodeInstaller.hpp"
+#include "precompiled.hpp"
+#include "jvmci/jvmci.hpp"
+#include "jvmci/jvmciCodeInstaller.hpp"
#include "jvmci/jvmciRuntime.hpp"
#include "jvmci/jvmciCompilerToVM.hpp"
#include "jvmci/jvmciJavaClasses.hpp"
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 81fd87614e5..005ad3f5930 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -288,27 +288,21 @@ address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
return address(((uint64_t)insn_addr + (offset << 2)));
}
-void MacroAssembler::safepoint_poll(Label& slow_path) {
- ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
- tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
-}
-
-// Just like safepoint_poll, but use an acquiring load for thread-
-// local polling.
-//
-// We need an acquire here to ensure that any subsequent load of the
-// global SafepointSynchronize::_state flag is ordered after this load
-// of the local Thread::_polling page. We don't want this poll to
-// return false (i.e. not safepointing) and a later poll of the global
-// SafepointSynchronize::_state spuriously to return true.
-//
-// This is to avoid a race when we're in a native->Java transition
-// racing the code which wakes up from a safepoint.
-//
-void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
- lea(rscratch1, Address(rthread, Thread::polling_page_offset()));
- ldar(rscratch1, rscratch1);
- tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
+ if (acquire) {
+ lea(rscratch1, Address(rthread, Thread::polling_word_offset()));
+ ldar(rscratch1, rscratch1);
+ } else {
+ ldr(rscratch1, Address(rthread, Thread::polling_word_offset()));
+ }
+ if (at_return) {
+ // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
+ // we may safely use the sp instead to perform the stack watermark check.
+ cmp(in_nmethod ? sp : rfp, rscratch1);
+ br(Assembler::HI, slow_path);
+ } else {
+ tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+ }
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
@@ -711,7 +705,7 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
// Maybe emit a call via a trampoline. If the code cache is small
// trampolines won't be emitted.
-address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
assert(JavaThread::current()->is_Compiler_thread(), "just checking");
assert(entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::opt_virtual_call_type
@@ -732,6 +726,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
if (!in_scratch_emit_size) {
address stub = emit_trampoline_stub(offset(), entry.target());
if (stub == NULL) {
+ postcond(pc() == badAddress);
return NULL; // CodeCache is full
}
}
@@ -745,6 +740,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
bl(pc());
}
// just need to return a non-null address
+ postcond(pc() != badAddress);
return pc();
}
@@ -938,23 +934,6 @@ void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
-
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0)
- return RegisterOrConstant(value + offset);
-
- // load indirectly to solve generation ordering problem
- ldr(tmp, ExternalAddress((address) delayed_value_addr));
-
- if (offset != 0)
- add(tmp, tmp, offset);
-
- return RegisterOrConstant(tmp);
-}
-
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by .
// The receiver klass is in recv_klass.
@@ -1834,7 +1813,7 @@ bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size
return true;
} else {
assert(size_in_bytes == 8 || size_in_bytes == 4, "only 8 bytes or 4 bytes load/store is supported.");
- const unsigned mask = size_in_bytes - 1;
+ const uint64_t mask = size_in_bytes - 1;
if (adr.getMode() == Address::base_plus_offset &&
(adr.offset() & mask) == 0) { // only supports base_plus_offset.
code()->set_last_insn(pc());
@@ -2898,7 +2877,7 @@ void MacroAssembler::merge_ldst(Register rt,
// Overwrite previous generated binary.
code_section()->set_end(prev);
- const int sz = prev_ldst->size_in_bytes();
+ const size_t sz = prev_ldst->size_in_bytes();
assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
if (!is_store) {
BLOCK_COMMENT("merged ldr pair");
@@ -4405,13 +4384,6 @@ void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype)
ldr(dest, Address(rthread, Thread::polling_page_offset()));
}
-// Move the address of the polling page into r, then read the polling
-// page.
-address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
- get_polling_page(r, rtype);
- return read_polling_page(r, rtype);
-}
-
// Read the polling page. The address of the polling page must
// already be in r.
address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
@@ -4503,7 +4475,7 @@ void MacroAssembler::remove_frame(int framesize) {
// This method checks if provided byte array contains byte with highest bit set.
-void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
+address MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
// Simple and most common case of aligned small array which is not at the
// end of memory page is placed here. All other cases are in stub.
Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
@@ -4540,27 +4512,38 @@ void MacroAssembler::has_negatives(Register ary1, Register len, Register result)
b(SET_RESULT);
BIND(STUB);
- RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives());
+ RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives());
assert(has_neg.target() != NULL, "has_negatives stub has not been generated");
- trampoline_call(has_neg);
+ address tpc1 = trampoline_call(has_neg);
+ if (tpc1 == NULL) {
+ DEBUG_ONLY(reset_labels(STUB_LONG, SET_RESULT, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
BIND(STUB_LONG);
- RuntimeAddress has_neg_long = RuntimeAddress(
- StubRoutines::aarch64::has_negatives_long());
+ RuntimeAddress has_neg_long = RuntimeAddress(StubRoutines::aarch64::has_negatives_long());
assert(has_neg_long.target() != NULL, "has_negatives stub has not been generated");
- trampoline_call(has_neg_long);
+ address tpc2 = trampoline_call(has_neg_long);
+ if (tpc2 == NULL) {
+ DEBUG_ONLY(reset_labels(SET_RESULT, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
BIND(SET_RESULT);
cset(result, NE); // set true or false
BIND(DONE);
+ postcond(pc() != badAddress);
+ return pc();
}
-void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
- Register tmp4, Register tmp5, Register result,
- Register cnt1, int elem_size) {
+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+ Register tmp4, Register tmp5, Register result,
+ Register cnt1, int elem_size) {
Label DONE, SAME;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
@@ -4664,7 +4647,7 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
}
}
} else {
- Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB, EARLY_OUT,
+ Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
CSET_EQ, LAST_CHECK;
mov(result, false);
cbz(a1, DONE);
@@ -4723,10 +4706,14 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
cbnz(tmp5, DONE);
RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
assert(stub.target() != NULL, "array_equals_long stub has not been generated");
- trampoline_call(stub);
+ address tpc = trampoline_call(stub);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
- bind(EARLY_OUT);
// (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
// so, if a2 == null => return false(0), else return true, so we can return a2
mov(result, a2);
@@ -4753,6 +4740,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
bind(DONE);
BLOCK_COMMENT("} array_equals");
+ postcond(pc() != badAddress);
+ return pc();
}
// Compare Strings
@@ -4860,7 +4849,7 @@ const int MacroAssembler::zero_words_block_size = 8;
// cnt: Count in HeapWords.
//
// ptr, cnt, rscratch1, and rscratch2 are clobbered.
-void MacroAssembler::zero_words(Register ptr, Register cnt)
+address MacroAssembler::zero_words(Register ptr, Register cnt)
{
assert(is_power_of_2(zero_words_block_size), "adjust this");
assert(ptr == r10 && cnt == r11, "mismatch in register usage");
@@ -4870,10 +4859,15 @@ void MacroAssembler::zero_words(Register ptr, Register cnt)
Label around;
br(LO, around);
{
- RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
if (StubRoutines::aarch64::complete()) {
- trampoline_call(zero_blocks);
+ address tpc = trampoline_call(zero_blocks);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(around));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
} else {
bl(zero_blocks);
}
@@ -4894,6 +4888,8 @@ void MacroAssembler::zero_words(Register ptr, Register cnt)
bind(l);
}
BLOCK_COMMENT("} zero_words");
+ postcond(pc() != badAddress);
+ return pc();
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
@@ -4906,14 +4902,15 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt)
if (i) str(zr, Address(base));
if (cnt <= SmallArraySize / BytesPerLong) {
- for (; i < (int)cnt; i += 2)
+ for (; i < (int)cnt; i += 2) {
stp(zr, zr, Address(base, i * wordSize));
+ }
} else {
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
int remainder = cnt % (2 * unroll);
- for (; i < remainder; i += 2)
+ for (; i < remainder; i += 2) {
stp(zr, zr, Address(base, i * wordSize));
-
+ }
Label loop;
Register cnt_reg = rscratch1;
Register loop_base = rscratch2;
@@ -4923,8 +4920,9 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt)
add(loop_base, base, (remainder - 2) * wordSize);
bind(loop);
sub(cnt_reg, cnt_reg, 2 * unroll);
- for (i = 1; i < unroll; i++)
+ for (i = 1; i < unroll; i++) {
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
+ }
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
cbnz(cnt_reg, loop);
}
@@ -5140,9 +5138,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
// Inflate byte[] array to char[].
-void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
- FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
- Register tmp4) {
+address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
+ FloatRegister vtmp1, FloatRegister vtmp2,
+ FloatRegister vtmp3, Register tmp4) {
Label big, done, after_init, to_stub;
assert_different_registers(src, dst, len, tmp4, rscratch1);
@@ -5179,9 +5177,14 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
if (SoftwarePrefetchHintDistance >= 0) {
bind(to_stub);
- RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
+ RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated");
- trampoline_call(stub);
+ address tpc = trampoline_call(stub);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(big, done));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(after_init);
}
@@ -5235,6 +5238,8 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
strq(vtmp3, Address(dst, -16));
bind(done);
+ postcond(pc() != badAddress);
+ return pc();
}
// Compress char[] array to byte[].
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index 998f1afc1c7..1d597fb429c 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -102,8 +102,7 @@ class MacroAssembler: public Assembler {
virtual void check_and_handle_popframe(Register java_thread);
virtual void check_and_handle_earlyret(Register java_thread);
- void safepoint_poll(Label& slow_path);
- void safepoint_poll_acquire(Label& slow_path);
+ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
// Biased locking support
// lock_reg and obj_reg must be loaded up with the appropriate values.
@@ -1014,10 +1013,6 @@ class MacroAssembler: public Assembler {
// Check for reserved stack access in method being exited (for JIT)
void reserved_stack_check();
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
// Arithmetics
void addptr(const Address &dst, int32_t src);
@@ -1063,10 +1058,24 @@ class MacroAssembler: public Assembler {
private:
void compare_eq(Register rn, Register rm, enum operand_size size);
+#ifdef ASSERT
+ // Template short-hand support to clean-up after a failed call to trampoline
+ // call generation (see trampoline_call() below), when a set of Labels must
+ // be reset (before returning).
+ template
+ void reset_labels(Label &lbl, More&... more) {
+ lbl.reset(); reset_labels(more...);
+ }
+ template
+ void reset_labels(Label &lbl) {
+ lbl.reset();
+ }
+#endif
+
public:
// Calls
- address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
+ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
static bool far_branches() {
return ReservedCodeCacheSize > branch_range || UseAOT;
@@ -1231,7 +1240,6 @@ class MacroAssembler: public Assembler {
address read_polling_page(Register r, relocInfo::relocType rtype);
void get_polling_page(Register dest, relocInfo::relocType rtype);
- address fetch_and_read_polling_page(Register r, relocInfo::relocType rtype);
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
void update_byte_crc32(Register crc, Register val, Register table);
@@ -1239,24 +1247,24 @@ class MacroAssembler: public Assembler {
Register table0, Register table1, Register table2, Register table3,
bool upper = false);
- void has_negatives(Register ary1, Register len, Register result);
+ address has_negatives(Register ary1, Register len, Register result);
- void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
- Register tmp1, Register tmp2, Register tmp3, int elem_size);
+ address arrays_equals(Register a1, Register a2, Register result, Register cnt1,
+ Register tmp1, Register tmp2, Register tmp3, int elem_size);
void string_equals(Register a1, Register a2, Register result, Register cnt1,
int elem_size);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, uint64_t cnt);
- void zero_words(Register ptr, Register cnt);
+ address zero_words(Register ptr, Register cnt);
void zero_dcache_blocks(Register base, Register cnt);
static const int zero_words_block_size;
- void byte_array_inflate(Register src, Register dst, Register len,
- FloatRegister vtmp1, FloatRegister vtmp2,
- FloatRegister vtmp3, Register tmp4);
+ address byte_array_inflate(Register src, Register dst, Register len,
+ FloatRegister vtmp1, FloatRegister vtmp2,
+ FloatRegister vtmp3, Register tmp4);
void char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index d40c533a82c..dcf87913a88 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -655,7 +655,7 @@ class NativeLdSt : public NativeInstruction {
return 0;
}
}
- size_t size_in_bytes() { return 1 << size(); }
+ size_t size_in_bytes() { return 1ULL << size(); }
bool is_not_pre_post_index() { return (is_ldst_ur() || is_ldst_unsigned_offset()); }
bool is_load() {
assert(Instruction_aarch64::extract(uint_at(0), 23, 22) == 0b01 ||
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index d556d957e6b..92a07a84d2a 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -38,6 +38,7 @@
#include "nativeInst_aarch64.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1080,20 +1081,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR
}
}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- int stack_slots,
- int total_c_args,
- int total_in_args,
- int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) { Unimplemented(); }
-
// Unpack an array argument into a pointer to the body and the length
// if the array is non-null, otherwise pass 0 for both.
static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
@@ -1259,25 +1246,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they block out GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
-//
nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const methodHandle& method,
int compile_id,
@@ -1524,7 +1498,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Generate stack overflow check
if (UseStackBanging) {
- __ bang_stack_with_offset(StackOverflow::stack_shadow_zone_size());
+ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size()));
} else {
Unimplemented();
}
@@ -1545,11 +1519,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const Register oop_handle_reg = r20;
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
- oop_handle_offset, oop_maps, in_regs, in_sig_bt);
- }
-
//
// We immediately shuffle the arguments so that any vm call we have to
// make from here on out (sync slow path, jvmti, etc.) we will have
@@ -1822,12 +1791,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// get JNIEnv* which is first argument to native
if (!is_critical_native) {
__ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
- }
- // Now set thread in native
- __ mov(rscratch1, _thread_in_native);
- __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
- __ stlrw(rscratch1, rscratch2);
+ // Now set thread in native
+ __ mov(rscratch1, _thread_in_native);
+ __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+ __ stlrw(rscratch1, rscratch2);
+ }
rt_call(masm, native_func);
@@ -1855,6 +1824,21 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
default : ShouldNotReachHere();
}
+ Label safepoint_in_progress, safepoint_in_progress_done;
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ __ safepoint_poll(needs_safepoint, false /* at_return */, true /* acquire */, false /* in_nmethod */);
+ __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
+ __ cbnzw(rscratch1, needs_safepoint);
+ __ b(after_transition);
+ __ bind(needs_safepoint);
+ }
+
// Switch thread to "native transition" state before reading the synchronization state.
// This additional state is necessary because reading and testing the synchronization
// state is not atomic w.r.t. GC, as this scenario demonstrates:
@@ -1875,16 +1859,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
}
// check for safepoint operation in progress and/or pending suspend requests
- Label safepoint_in_progress, safepoint_in_progress_done;
{
- __ safepoint_poll_acquire(safepoint_in_progress);
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ //
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+
+ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbnzw(rscratch1, safepoint_in_progress);
__ bind(safepoint_in_progress_done);
}
// change thread state
- Label after_transition;
__ mov(rscratch1, _thread_in_Java);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
@@ -2089,22 +2080,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
- if (!is_critical_native) {
- __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- } else {
- __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
- }
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
__ blr(rscratch1);
__ maybe_isb();
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
- if (is_critical_native) {
- // The call above performed the transition to thread_in_Java so
- // skip the transition logic above.
- __ b(after_transition);
- }
-
__ b(safepoint_in_progress_done);
__ block_comment("} safepoint");
}
@@ -2153,12 +2134,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
-
}
// this function returns the adjust size (in number of words) to a c2i adapter
@@ -2469,7 +2445,7 @@ void SharedRuntime::generate_deopt_blob() {
__ sub(sp, sp, r19);
// Push interpreter frames in a loop
- __ mov(rscratch1, (address)0xDEADDEAD); // Make a recognizable pattern
+ __ mov(rscratch1, (uint64_t)0xDEADDEAD); // Make a recognizable pattern
__ mov(rscratch2, rscratch1);
Label loop;
__ bind(loop);
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 412578eea5c..09ea5387165 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -611,6 +611,16 @@ class StubGenerator: public StubCodeGenerator {
void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
+ // Generate indices for iota vector.
+ address generate_iota_indices(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data64(0x0706050403020100, relocInfo::none);
+ __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
+ return start;
+ }
+
// The inner part of zero_words(). This is the bulk operation,
// zeroing words in blocks, possibly using DC ZVA to do it. The
// caller is responsible for zeroing the last few words.
@@ -1295,14 +1305,14 @@ class StubGenerator: public StubCodeGenerator {
// Scan over array at a for count oops, verifying each one.
// Preserves a and count, clobbers rscratch1 and rscratch2.
- void verify_oop_array (size_t size, Register a, Register count, Register temp) {
+ void verify_oop_array (int size, Register a, Register count, Register temp) {
Label loop, end;
__ mov(rscratch1, a);
__ mov(rscratch2, zr);
__ bind(loop);
__ cmp(rscratch2, count);
__ br(Assembler::HS, end);
- if (size == (size_t)wordSize) {
+ if (size == wordSize) {
__ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
__ verify_oop(temp);
} else {
@@ -1333,7 +1343,7 @@ class StubGenerator: public StubCodeGenerator {
// disjoint_int_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_int_oop_copy().
//
- address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
+ address generate_disjoint_copy(int size, bool aligned, bool is_oop, address *entry,
const char *name, bool dest_uninitialized = false) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_reg = RegSet::of(s, d, count);
@@ -1399,7 +1409,7 @@ class StubGenerator: public StubCodeGenerator {
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
- address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target,
address *entry, const char *name,
bool dest_uninitialized = false) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
@@ -1650,7 +1660,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_disjoint_oop_copy(bool aligned, address *entry,
const char *name, bool dest_uninitialized) {
const bool is_oop = true;
- const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
}
@@ -1668,7 +1678,7 @@ class StubGenerator: public StubCodeGenerator {
address nooverlap_target, address *entry,
const char *name, bool dest_uninitialized) {
const bool is_oop = true;
- const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
name, dest_uninitialized);
}
@@ -3299,6 +3309,225 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - byte[] source+offset
+ // c_rarg1 - byte[] SHA.state
+ // c_rarg2 - int digest_length
+ // c_rarg3 - int offset
+ // c_rarg4 - int limit
+ //
+ address generate_sha3_implCompress(bool multi_block, const char *name) {
+ static const uint64_t round_consts[24] = {
+ 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
+ 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
+ 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
+ 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
+ 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
+ 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
+ 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
+ 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
+ };
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ Register buf = c_rarg0;
+ Register state = c_rarg1;
+ Register digest_length = c_rarg2;
+ Register ofs = c_rarg3;
+ Register limit = c_rarg4;
+
+ Label sha3_loop, rounds24_loop;
+ Label sha3_512, sha3_384_or_224, sha3_256;
+
+ __ stpd(v8, v9, __ pre(sp, -64));
+ __ stpd(v10, v11, Address(sp, 16));
+ __ stpd(v12, v13, Address(sp, 32));
+ __ stpd(v14, v15, Address(sp, 48));
+
+ // load state
+ __ add(rscratch1, state, 32);
+ __ ld1(v0, v1, v2, v3, __ T1D, state);
+ __ ld1(v4, v5, v6, v7, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v8, v9, v10, v11, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v12, v13, v14, v15, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v16, v17, v18, v19, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v20, v21, v22, v23, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v24, __ T1D, rscratch1);
+
+ __ BIND(sha3_loop);
+
+ // 24 keccak rounds
+ __ movw(rscratch2, 24);
+
+ // load round_constants base
+ __ lea(rscratch1, ExternalAddress((address) round_consts));
+
+ // load input
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, v30, v31, __ T8B, __ post(buf, 24));
+ __ eor(v0, __ T8B, v0, v25);
+ __ eor(v1, __ T8B, v1, v26);
+ __ eor(v2, __ T8B, v2, v27);
+ __ eor(v3, __ T8B, v3, v28);
+ __ eor(v4, __ T8B, v4, v29);
+ __ eor(v5, __ T8B, v5, v30);
+ __ eor(v6, __ T8B, v6, v31);
+
+ // digest_length == 64, SHA3-512
+ __ tbnz(digest_length, 6, sha3_512);
+
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, v30, __ T8B, __ post(buf, 16));
+ __ eor(v7, __ T8B, v7, v25);
+ __ eor(v8, __ T8B, v8, v26);
+ __ eor(v9, __ T8B, v9, v27);
+ __ eor(v10, __ T8B, v10, v28);
+ __ eor(v11, __ T8B, v11, v29);
+ __ eor(v12, __ T8B, v12, v30);
+
+ // digest_length == 28, SHA3-224; digest_length == 48, SHA3-384
+ __ tbnz(digest_length, 4, sha3_384_or_224);
+
+ // SHA3-256
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ eor(v13, __ T8B, v13, v25);
+ __ eor(v14, __ T8B, v14, v26);
+ __ eor(v15, __ T8B, v15, v27);
+ __ eor(v16, __ T8B, v16, v28);
+ __ b(rounds24_loop);
+
+ __ BIND(sha3_384_or_224);
+ __ tbz(digest_length, 2, rounds24_loop); // bit 2 cleared? SHA-384
+
+ // SHA3-224
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, __ T8B, __ post(buf, 8));
+ __ eor(v13, __ T8B, v13, v25);
+ __ eor(v14, __ T8B, v14, v26);
+ __ eor(v15, __ T8B, v15, v27);
+ __ eor(v16, __ T8B, v16, v28);
+ __ eor(v17, __ T8B, v17, v29);
+ __ b(rounds24_loop);
+
+ __ BIND(sha3_512);
+ __ ld1(v25, v26, __ T8B, __ post(buf, 16));
+ __ eor(v7, __ T8B, v7, v25);
+ __ eor(v8, __ T8B, v8, v26);
+
+ __ BIND(rounds24_loop);
+ __ subw(rscratch2, rscratch2, 1);
+
+ __ eor3(v29, __ T16B, v4, v9, v14);
+ __ eor3(v26, __ T16B, v1, v6, v11);
+ __ eor3(v28, __ T16B, v3, v8, v13);
+ __ eor3(v25, __ T16B, v0, v5, v10);
+ __ eor3(v27, __ T16B, v2, v7, v12);
+ __ eor3(v29, __ T16B, v29, v19, v24);
+ __ eor3(v26, __ T16B, v26, v16, v21);
+ __ eor3(v28, __ T16B, v28, v18, v23);
+ __ eor3(v25, __ T16B, v25, v15, v20);
+ __ eor3(v27, __ T16B, v27, v17, v22);
+
+ __ rax1(v30, __ T2D, v29, v26);
+ __ rax1(v26, __ T2D, v26, v28);
+ __ rax1(v28, __ T2D, v28, v25);
+ __ rax1(v25, __ T2D, v25, v27);
+ __ rax1(v27, __ T2D, v27, v29);
+
+ __ eor(v0, __ T16B, v0, v30);
+ __ xar(v29, __ T2D, v1, v25, (64 - 1));
+ __ xar(v1, __ T2D, v6, v25, (64 - 44));
+ __ xar(v6, __ T2D, v9, v28, (64 - 20));
+ __ xar(v9, __ T2D, v22, v26, (64 - 61));
+ __ xar(v22, __ T2D, v14, v28, (64 - 39));
+ __ xar(v14, __ T2D, v20, v30, (64 - 18));
+ __ xar(v31, __ T2D, v2, v26, (64 - 62));
+ __ xar(v2, __ T2D, v12, v26, (64 - 43));
+ __ xar(v12, __ T2D, v13, v27, (64 - 25));
+ __ xar(v13, __ T2D, v19, v28, (64 - 8));
+ __ xar(v19, __ T2D, v23, v27, (64 - 56));
+ __ xar(v23, __ T2D, v15, v30, (64 - 41));
+ __ xar(v15, __ T2D, v4, v28, (64 - 27));
+ __ xar(v28, __ T2D, v24, v28, (64 - 14));
+ __ xar(v24, __ T2D, v21, v25, (64 - 2));
+ __ xar(v8, __ T2D, v8, v27, (64 - 55));
+ __ xar(v4, __ T2D, v16, v25, (64 - 45));
+ __ xar(v16, __ T2D, v5, v30, (64 - 36));
+ __ xar(v5, __ T2D, v3, v27, (64 - 28));
+ __ xar(v27, __ T2D, v18, v27, (64 - 21));
+ __ xar(v3, __ T2D, v17, v26, (64 - 15));
+ __ xar(v25, __ T2D, v11, v25, (64 - 10));
+ __ xar(v26, __ T2D, v7, v26, (64 - 6));
+ __ xar(v30, __ T2D, v10, v30, (64 - 3));
+
+ __ bcax(v20, __ T16B, v31, v22, v8);
+ __ bcax(v21, __ T16B, v8, v23, v22);
+ __ bcax(v22, __ T16B, v22, v24, v23);
+ __ bcax(v23, __ T16B, v23, v31, v24);
+ __ bcax(v24, __ T16B, v24, v8, v31);
+
+ __ ld1r(v31, __ T2D, __ post(rscratch1, 8));
+
+ __ bcax(v17, __ T16B, v25, v19, v3);
+ __ bcax(v18, __ T16B, v3, v15, v19);
+ __ bcax(v19, __ T16B, v19, v16, v15);
+ __ bcax(v15, __ T16B, v15, v25, v16);
+ __ bcax(v16, __ T16B, v16, v3, v25);
+
+ __ bcax(v10, __ T16B, v29, v12, v26);
+ __ bcax(v11, __ T16B, v26, v13, v12);
+ __ bcax(v12, __ T16B, v12, v14, v13);
+ __ bcax(v13, __ T16B, v13, v29, v14);
+ __ bcax(v14, __ T16B, v14, v26, v29);
+
+ __ bcax(v7, __ T16B, v30, v9, v4);
+ __ bcax(v8, __ T16B, v4, v5, v9);
+ __ bcax(v9, __ T16B, v9, v6, v5);
+ __ bcax(v5, __ T16B, v5, v30, v6);
+ __ bcax(v6, __ T16B, v6, v4, v30);
+
+ __ bcax(v3, __ T16B, v27, v0, v28);
+ __ bcax(v4, __ T16B, v28, v1, v0);
+ __ bcax(v0, __ T16B, v0, v2, v1);
+ __ bcax(v1, __ T16B, v1, v27, v2);
+ __ bcax(v2, __ T16B, v2, v28, v27);
+
+ __ eor(v0, __ T16B, v0, v31);
+
+ __ cbnzw(rscratch2, rounds24_loop);
+
+ if (multi_block) {
+ // block_size = 200 - 2 * digest_length, ofs += block_size
+ __ add(ofs, ofs, 200);
+ __ sub(ofs, ofs, digest_length, Assembler::LSL, 1);
+
+ __ cmp(ofs, limit);
+ __ br(Assembler::LE, sha3_loop);
+ __ mov(c_rarg0, ofs); // return ofs
+ }
+
+ __ st1(v0, v1, v2, v3, __ T1D, __ post(state, 32));
+ __ st1(v4, v5, v6, v7, __ T1D, __ post(state, 32));
+ __ st1(v8, v9, v10, v11, __ T1D, __ post(state, 32));
+ __ st1(v12, v13, v14, v15, __ T1D, __ post(state, 32));
+ __ st1(v16, v17, v18, v19, __ T1D, __ post(state, 32));
+ __ st1(v20, v21, v22, v23, __ T1D, __ post(state, 32));
+ __ st1(v24, __ T1D, state);
+
+ __ ldpd(v14, v15, Address(sp, 48));
+ __ ldpd(v12, v13, Address(sp, 32));
+ __ ldpd(v10, v11, Address(sp, 16));
+ __ ldpd(v8, v9, __ post(sp, 64));
+
+ __ ret(lr);
+
+ return start;
+ }
+
// Safefetch stubs.
void generate_safefetch(const char* name, int size, address* entry,
address* fault_pc, address* continuation_pc) {
@@ -3739,6 +3968,238 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerRightShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
+ address start = __ pc();
+
+ Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+ Register idx = numIter;
+
+ Register newArrCur = rscratch1;
+ Register shiftRevCount = rscratch2;
+ Register oldArrCur = r13;
+ Register oldArrNext = r14;
+
+ FloatRegister oldElem0 = v0;
+ FloatRegister oldElem1 = v1;
+ FloatRegister newElem = v2;
+ FloatRegister shiftVCount = v3;
+ FloatRegister shiftVRevCount = v4;
+
+ __ cbz(idx, Exit);
+
+ __ add(newArr, newArr, newIdx, Assembler::LSL, 2);
+
+ // left shift count
+ __ movw(shiftRevCount, 32);
+ __ subw(shiftRevCount, shiftRevCount, shiftCount);
+
+ // numIter too small to allow a 4-words SIMD loop, rolling back
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftThree);
+
+ __ dup(shiftVCount, __ T4S, shiftCount);
+ __ dup(shiftVRevCount, __ T4S, shiftRevCount);
+ __ negr(shiftVCount, __ T4S, shiftVCount);
+
+ __ BIND(ShiftSIMDLoop);
+
+ // Calculate the load addresses
+ __ sub(idx, idx, 4);
+ __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2);
+ __ add(newArrCur, newArr, idx, Assembler::LSL, 2);
+ __ add(oldArrCur, oldArrNext, 4);
+
+ // Load 4 words and process
+ __ ld1(oldElem0, __ T4S, Address(oldArrCur));
+ __ ld1(oldElem1, __ T4S, Address(oldArrNext));
+ __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T16B, oldElem0, oldElem1);
+ __ st1(newElem, __ T4S, Address(newArrCur));
+
+ __ cmp(idx, (u1)4);
+ __ br(Assembler::LT, ShiftTwoLoop);
+ __ b(ShiftSIMDLoop);
+
+ __ BIND(ShiftTwoLoop);
+ __ cbz(idx, Exit);
+ __ cmp(idx, (u1)1);
+ __ br(Assembler::EQ, ShiftOne);
+
+ // Calculate the load addresses
+ __ sub(idx, idx, 2);
+ __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2);
+ __ add(newArrCur, newArr, idx, Assembler::LSL, 2);
+ __ add(oldArrCur, oldArrNext, 4);
+
+ // Load 2 words and process
+ __ ld1(oldElem0, __ T2S, Address(oldArrCur));
+ __ ld1(oldElem1, __ T2S, Address(oldArrNext));
+ __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T8B, oldElem0, oldElem1);
+ __ st1(newElem, __ T2S, Address(newArrCur));
+ __ b(ShiftTwoLoop);
+
+ __ BIND(ShiftThree);
+ __ tbz(idx, 1, ShiftOne);
+ __ tbz(idx, 0, ShiftTwo);
+ __ ldrw(r10, Address(oldArr, 12));
+ __ ldrw(r11, Address(oldArr, 8));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr, 8));
+
+ __ BIND(ShiftTwo);
+ __ ldrw(r10, Address(oldArr, 8));
+ __ ldrw(r11, Address(oldArr, 4));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr, 4));
+
+ __ BIND(ShiftOne);
+ __ ldrw(r10, Address(oldArr, 4));
+ __ ldrw(r11, Address(oldArr));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr));
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerLeftShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
+ address start = __ pc();
+
+ Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+
+ Register shiftRevCount = rscratch1;
+ Register oldArrNext = rscratch2;
+
+ FloatRegister oldElem0 = v0;
+ FloatRegister oldElem1 = v1;
+ FloatRegister newElem = v2;
+ FloatRegister shiftVCount = v3;
+ FloatRegister shiftVRevCount = v4;
+
+ __ cbz(numIter, Exit);
+
+ __ add(oldArrNext, oldArr, 4);
+ __ add(newArr, newArr, newIdx, Assembler::LSL, 2);
+
+ // right shift count
+ __ movw(shiftRevCount, 32);
+ __ subw(shiftRevCount, shiftRevCount, shiftCount);
+
+ // numIter too small to allow a 4-words SIMD loop, rolling back
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftThree);
+
+ __ dup(shiftVCount, __ T4S, shiftCount);
+ __ dup(shiftVRevCount, __ T4S, shiftRevCount);
+ __ negr(shiftVRevCount, __ T4S, shiftVRevCount);
+
+ __ BIND(ShiftSIMDLoop);
+
+ // load 4 words and process
+ __ ld1(oldElem0, __ T4S, __ post(oldArr, 16));
+ __ ld1(oldElem1, __ T4S, __ post(oldArrNext, 16));
+ __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T16B, oldElem0, oldElem1);
+ __ st1(newElem, __ T4S, __ post(newArr, 16));
+ __ sub(numIter, numIter, 4);
+
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftTwoLoop);
+ __ b(ShiftSIMDLoop);
+
+ __ BIND(ShiftTwoLoop);
+ __ cbz(numIter, Exit);
+ __ cmp(numIter, (u1)1);
+ __ br(Assembler::EQ, ShiftOne);
+
+ // load 2 words and process
+ __ ld1(oldElem0, __ T2S, __ post(oldArr, 8));
+ __ ld1(oldElem1, __ T2S, __ post(oldArrNext, 8));
+ __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T8B, oldElem0, oldElem1);
+ __ st1(newElem, __ T2S, __ post(newArr, 8));
+ __ sub(numIter, numIter, 2);
+ __ b(ShiftTwoLoop);
+
+ __ BIND(ShiftThree);
+ __ ldrw(r10, __ post(oldArr, 4));
+ __ ldrw(r11, __ post(oldArrNext, 4));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, __ post(newArr, 4));
+ __ tbz(numIter, 1, Exit);
+ __ tbz(numIter, 0, ShiftOne);
+
+ __ BIND(ShiftTwo);
+ __ ldrw(r10, __ post(oldArr, 4));
+ __ ldrw(r11, __ post(oldArrNext, 4));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, __ post(newArr, 4));
+
+ __ BIND(ShiftOne);
+ __ ldrw(r10, Address(oldArr));
+ __ ldrw(r11, Address(oldArrNext));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr));
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@@ -4942,6 +5403,150 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ void generate_base64_encode_simdround(Register src, Register dst,
+ FloatRegister codec, u8 size) {
+
+ FloatRegister in0 = v4, in1 = v5, in2 = v6;
+ FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19;
+ FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23;
+
+ Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B;
+
+ __ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size));
+
+ __ ushr(ind0, arrangement, in0, 2);
+
+ __ ushr(ind1, arrangement, in1, 2);
+ __ shl(in0, arrangement, in0, 6);
+ __ orr(ind1, arrangement, ind1, in0);
+ __ ushr(ind1, arrangement, ind1, 2);
+
+ __ ushr(ind2, arrangement, in2, 4);
+ __ shl(in1, arrangement, in1, 4);
+ __ orr(ind2, arrangement, in1, ind2);
+ __ ushr(ind2, arrangement, ind2, 2);
+
+ __ shl(ind3, arrangement, in2, 2);
+ __ ushr(ind3, arrangement, ind3, 2);
+
+ __ tbl(out0, arrangement, codec, 4, ind0);
+ __ tbl(out1, arrangement, codec, 4, ind1);
+ __ tbl(out2, arrangement, codec, 4, ind2);
+ __ tbl(out3, arrangement, codec, 4, ind3);
+
+ __ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size));
+ }
+
+ /**
+ * Arguments:
+ *
+ * Input:
+ * c_rarg0 - src_start
+ * c_rarg1 - src_offset
+ * c_rarg2 - src_length
+ * c_rarg3 - dest_start
+ * c_rarg4 - dest_offset
+ * c_rarg5 - isURL
+ *
+ */
+ address generate_base64_encodeBlock() {
+
+ static const char toBase64[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
+ };
+
+ static const char toBase64URL[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
+ };
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "encodeBlock");
+ address start = __ pc();
+
+ Register src = c_rarg0; // source array
+ Register soff = c_rarg1; // source start offset
+ Register send = c_rarg2; // source end offset
+ Register dst = c_rarg3; // dest array
+ Register doff = c_rarg4; // position for writing to dest array
+ Register isURL = c_rarg5; // Base64 or URL chracter set
+
+ // c_rarg6 and c_rarg7 are free to use as temps
+ Register codec = c_rarg6;
+ Register length = c_rarg7;
+
+ Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit;
+
+ __ add(src, src, soff);
+ __ add(dst, dst, doff);
+ __ sub(length, send, soff);
+
+ // load the codec base address
+ __ lea(codec, ExternalAddress((address) toBase64));
+ __ cbz(isURL, ProcessData);
+ __ lea(codec, ExternalAddress((address) toBase64URL));
+
+ __ BIND(ProcessData);
+
+ // too short to formup a SIMD loop, roll back
+ __ cmp(length, (u1)24);
+ __ br(Assembler::LT, Process3B);
+
+ __ ld1(v0, v1, v2, v3, __ T16B, Address(codec));
+
+ __ BIND(Process48B);
+ __ cmp(length, (u1)48);
+ __ br(Assembler::LT, Process24B);
+ generate_base64_encode_simdround(src, dst, v0, 16);
+ __ sub(length, length, 48);
+ __ b(Process48B);
+
+ __ BIND(Process24B);
+ __ cmp(length, (u1)24);
+ __ br(Assembler::LT, SIMDExit);
+ generate_base64_encode_simdround(src, dst, v0, 8);
+ __ sub(length, length, 24);
+
+ __ BIND(SIMDExit);
+ __ cbz(length, Exit);
+
+ __ BIND(Process3B);
+ // 3 src bytes, 24 bits
+ __ ldrb(r10, __ post(src, 1));
+ __ ldrb(r11, __ post(src, 1));
+ __ ldrb(r12, __ post(src, 1));
+ __ orrw(r11, r11, r10, Assembler::LSL, 8);
+ __ orrw(r12, r12, r11, Assembler::LSL, 8);
+ // codec index
+ __ ubfmw(r15, r12, 18, 23);
+ __ ubfmw(r14, r12, 12, 17);
+ __ ubfmw(r13, r12, 6, 11);
+ __ andw(r12, r12, 63);
+ // get the code based on the codec
+ __ ldrb(r15, Address(codec, r15, Address::uxtw(0)));
+ __ ldrb(r14, Address(codec, r14, Address::uxtw(0)));
+ __ ldrb(r13, Address(codec, r13, Address::uxtw(0)));
+ __ ldrb(r12, Address(codec, r12, Address::uxtw(0)));
+ __ strb(r15, __ post(dst, 1));
+ __ strb(r14, __ post(dst, 1));
+ __ strb(r13, __ post(dst, 1));
+ __ strb(r12, __ post(dst, 1));
+ __ sub(length, length, 3);
+ __ cbnz(length, Process3B);
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
@@ -5958,6 +6563,8 @@ class StubGenerator: public StubCodeGenerator {
SharedRuntime::
throw_NullPointerException_at_call));
+ StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
+
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
@@ -5993,6 +6600,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_mulAdd = generate_mulAdd();
}
+ if (UseSIMDForBigIntegerShiftIntrinsics) {
+ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+ }
+
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
@@ -6013,6 +6625,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
}
+ if (UseBASE64Intrinsics) {
+ StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
+ }
+
// data cache line writeback
StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
@@ -6036,6 +6652,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
+ if (UseSHA3Intrinsics) {
+ StubRoutines::_sha3_implCompress = generate_sha3_implCompress(false, "sha3_implCompress");
+ StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(true, "sha3_implCompressMB");
+ }
// generate Adler32 intrinsics code
if (UseAdler32Intrinsics) {
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
index b2d0d5dbff8..f471209a4c0 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
@@ -40,6 +40,7 @@ address StubRoutines::aarch64::_f2i_fixup = NULL;
address StubRoutines::aarch64::_f2l_fixup = NULL;
address StubRoutines::aarch64::_d2i_fixup = NULL;
address StubRoutines::aarch64::_d2l_fixup = NULL;
+address StubRoutines::aarch64::_vector_iota_indices = NULL;
address StubRoutines::aarch64::_float_sign_mask = NULL;
address StubRoutines::aarch64::_float_sign_flip = NULL;
address StubRoutines::aarch64::_double_sign_mask = NULL;
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
index 4ace7b5c808..6960a19b3f5 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
@@ -51,6 +51,7 @@ class aarch64 {
static address _d2i_fixup;
static address _d2l_fixup;
+ static address _vector_iota_indices;
static address _float_sign_mask;
static address _float_sign_flip;
static address _double_sign_mask;
@@ -106,6 +107,10 @@ class aarch64 {
return _d2l_fixup;
}
+ static address vector_iota_indices() {
+ return _vector_iota_indices;
+ }
+
static address float_sign_mask()
{
return _float_sign_mask;
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 21566592a9f..874d8ce2766 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -980,7 +980,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path);
+ __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -1029,7 +1029,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path);
+ __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -1120,7 +1120,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
// an interpreter frame with greater than a page of locals, so each page
// needs to be checked. Only true for non-native.
if (UseStackBanging) {
- const int n_shadow_pages = StackOverflow::stack_shadow_zone_size() / os::vm_page_size();
+ const int n_shadow_pages = (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size());
const int start_page = native_call ? n_shadow_pages : 1;
const int page_size = os::vm_page_size();
for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
@@ -1388,7 +1388,16 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// check for safepoint operation in progress and/or pending suspend requests
{
Label L, Continue;
- __ safepoint_poll_acquire(L);
+
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ //
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
__ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbz(rscratch2, Continue);
__ bind(L);
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 842f07ae9a0..811783fcb7d 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -1906,7 +1906,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
__ dispatch_only(vtos, /*generate_poll*/true);
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
index 343a2bbd50f..2a6553d9c21 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -181,10 +181,6 @@ void VM_Version::initialize() {
}
if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
- // If an olde style /proc/cpuinfo (cores == 1) then if _model is an A57 (0xd07)
- // we assume the worst and assume we could be on a big little system and have
- // undisclosed A53 cores which we could be swapped to at any stage
- if (_cpu == CPU_ARM && os::processor_count() == 1 && _model == 0xd07) _features |= CPU_A53MAC;
char buf[512];
sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision);
@@ -194,6 +190,7 @@ void VM_Version::initialize() {
if (_features & CPU_AES) strcat(buf, ", aes");
if (_features & CPU_SHA1) strcat(buf, ", sha1");
if (_features & CPU_SHA2) strcat(buf, ", sha256");
+ if (_features & CPU_SHA3) strcat(buf, ", sha3");
if (_features & CPU_SHA512) strcat(buf, ", sha512");
if (_features & CPU_LSE) strcat(buf, ", lse");
if (_features & CPU_SVE) strcat(buf, ", sve");
@@ -275,7 +272,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
}
- if (_features & (CPU_SHA1 | CPU_SHA2)) {
+ if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) {
if (FLAG_IS_DEFAULT(UseSHA)) {
FLAG_SET_DEFAULT(UseSHA, true);
}
@@ -302,6 +299,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
+ if (UseSHA && (_features & CPU_SHA3)) {
+ // Do not auto-enable UseSHA3Intrinsics until it has been fully tested on hardware
+ // if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
+ // FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
+ // }
+ } else if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (UseSHA && (_features & CPU_SHA512)) {
// Do not auto-enable UseSHA512Intrinsics until it has been fully tested on hardware
// if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
@@ -312,7 +319,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
@@ -325,6 +332,10 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
}
+ if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
+ UseBASE64Intrinsics = true;
+ }
+
if (is_zva_enabled()) {
if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
FLAG_SET_DEFAULT(UseBlockZeroing, true);
@@ -390,7 +401,7 @@ void VM_Version::initialize() {
warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
UseSVE = 0;
} else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
- int new_vl = set_and_get_current_sve_vector_lenght(MaxVectorSize);
+ int new_vl = set_and_get_current_sve_vector_length(MaxVectorSize);
_initial_sve_vector_length = new_vl;
// Update MaxVectorSize to the largest supported value.
if (new_vl < 0) {
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index 292550529b4..45838f87072 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -51,7 +51,7 @@ class VM_Version : public Abstract_VM_Version {
// Sets the SVE length and returns a new actual value or negative on error.
// If the len is larger than the system largest supported SVE vector length,
// the function sets the largest supported value.
- static int set_and_get_current_sve_vector_lenght(int len);
+ static int set_and_get_current_sve_vector_length(int len);
static int get_current_sve_vector_length();
public:
@@ -103,6 +103,7 @@ class VM_Version : public Abstract_VM_Version {
CPU_CRC32 = (1<<7),
CPU_LSE = (1<<8),
CPU_DCPOP = (1<<16),
+ CPU_SHA3 = (1<<17),
CPU_SHA512 = (1<<21),
CPU_SVE = (1<<22),
// flags above must follow Linux HWCAP
@@ -128,6 +129,7 @@ class VM_Version : public Abstract_VM_Version {
static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
static bool supports_fast_class_init_checks() { return true; }
+ constexpr static bool supports_stack_watermark_barrier() { return true; }
};
#endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 4c237673181..b7c6ec48896 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -993,6 +993,10 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return VM_Version::has_simd();
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -1155,10 +1159,6 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = false;
-// No-op on ARM.
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -1663,7 +1663,6 @@ frame %{
// These two registers define part of the calling convention
// between compiled code and the interpreter.
inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C
- interpreter_method_reg(R_Rmethod); // Method Register when calling interpreter
// Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
cisc_spilling_operand_name(indOffset);
@@ -2523,14 +2522,6 @@ operand inline_cache_regP(iRegP reg) %{
interface(REG_INTER);
%}
-operand interpreter_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(Rmethod_regP));
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-
//----------Complex Operands---------------------------------------------------
// Indirect Memory Reference
operand indirect(sp_ptr_RegP reg) %{
diff --git a/src/hotspot/cpu/arm/arm_32.ad b/src/hotspot/cpu/arm/arm_32.ad
index 177c1a7cae0..09fce8c4c4f 100644
--- a/src/hotspot/cpu/arm/arm_32.ad
+++ b/src/hotspot/cpu/arm/arm_32.ad
@@ -182,11 +182,11 @@ alloc_class chunk0(
alloc_class chunk1(
R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23,
R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31,
- R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7,
+ R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7,
R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15,
- R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
- R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
- R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
+ R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
+ R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
+ R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x
);
@@ -196,8 +196,7 @@ alloc_class chunk2(APSR, FPSCR);
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( as defined in frame section )
-// 2) reg_class interpreter_method_reg ( as defined in frame section )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// ----------------------------
@@ -223,7 +222,6 @@ reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_
reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14, R_R10 /* TLS*/, R_R13 /* SP*/);
#define R_Ricklass R_R8
-#define R_Rmethod R_R9
#define R_Rthread R_R10
#define R_Rexception_obj R_R4
@@ -237,7 +235,6 @@ reg_class R9_regP(R_R9);
reg_class R12_regP(R_R12);
reg_class Rexception_regP(R_Rexception_obj);
reg_class Ricklass_regP(R_Ricklass);
-reg_class Rmethod_regP(R_Rmethod);
reg_class Rthread_regP(R_Rthread);
reg_class IP_regP(R_R12);
reg_class SP_regP(R_R13);
@@ -442,7 +439,7 @@ int MachCallStaticJavaNode::ret_addr_offset() {
int MachCallDynamicJavaNode::ret_addr_offset() {
bool far = !cache_reachable();
// mov_oop is always 2 words
- return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size;
+ return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size;
}
int MachCallRuntimeNode::ret_addr_offset() {
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index 915eb73730c..6b390c1cda3 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -38,6 +38,10 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_bci, 0);
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index 7b0794afc9f..f9b5fc69a89 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -283,7 +283,7 @@ int LIR_Assembler::emit_deopt_handler() {
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
// Pop the frame before safepoint polling
__ remove_frame(initial_frame_size_in_bytes());
__ read_polling_page(Rtemp, relocInfo::poll_return_type);
diff --git a/src/hotspot/cpu/arm/c1_globals_arm.hpp b/src/hotspot/cpu/arm/c1_globals_arm.hpp
index 8141870536b..7077a87092c 100644
--- a/src/hotspot/cpu/arm/c1_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp
@@ -35,8 +35,6 @@
#ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp
index 3708e38da2e..525af8b1edc 100644
--- a/src/hotspot/cpu/arm/c2_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp
@@ -54,8 +54,6 @@ define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize
define_pd_global(intx, RegisterCostAreaRatio, 16000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, MinJumpTableSize, 16);
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index 116d2d40b2e..01ff3a5d39c 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -580,7 +580,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (needs_thread_local_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- ldr(Rtemp, Address(Rthread, Thread::polling_page_offset()));
+ ldr(Rtemp, Address(Rthread, Thread::polling_word_offset()));
tbnz(Rtemp, exact_log2(SafepointMechanism::poll_bit()), safepoint);
}
@@ -983,7 +983,7 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
// Unlocks an object. Used in monitorexit bytecode and remove_activation.
//
-// Argument: R1: Points to BasicObjectLock structure for lock
+// Argument: R0: Points to BasicObjectLock structure for lock
// Throw an IllegalMonitorException if object is not locked by current thread
// Blows volatile registers R0-R3, Rtemp, LR. Calls VM.
void InterpreterMacroAssembler::unlock_object(Register Rlock) {
@@ -996,8 +996,7 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) {
const Register Robj = R2;
const Register Rmark = R3;
- const Register Rresult = R0;
- assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp);
+ assert_different_registers(Robj, Rmark, Rlock, Rtemp);
const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
index 14ac1163da0..067ec704376 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
@@ -85,20 +85,6 @@ void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
}
}
-// Initially added to the Assembler interface as a pure virtual:
-// RegisterConstant delayed_value(..)
-// for:
-// 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
-// this was subsequently modified to its present name and return type
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- ShouldNotReachHere();
- return RegisterOrConstant(-1);
-}
-
-
-
// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,
@@ -1914,7 +1900,7 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
}
void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
- ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset()));
+ ldr_u32(tmp1, Address(Rthread, Thread::polling_word_offset()));
tst(tmp1, exact_log2(SafepointMechanism::poll_bit()));
b(slow_path, eq);
}
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
index de40c5741a7..a07ca65d99e 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
@@ -222,14 +222,6 @@ class MacroAssembler: public Assembler {
// returning false to preserve all relocation information.
inline bool ignore_non_patchable_relocations() { return true; }
- // Initially added to the Assembler interface as a pure virtual:
- // RegisterConstant delayed_value(..)
- // for:
- // 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
- // this was subsequently modified to its present name and return type
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
-
-
void align(int modulus);
// Support for VM calls
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 7dd1f21a244..a4216785e4e 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -33,6 +33,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/vframeArray.hpp"
diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp
index a27bd25557c..d0bcfccbb8d 100644
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -2101,7 +2101,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset()));
__ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask,
Rcnt, R4_tmp, eq, &backedge_counter_overflow);
- } else {
+ } else { // not TieredCompilation
// Increment backedge counter in MethodCounters*
__ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/,
Rdisp, R3_bytecode,
@@ -2166,7 +2166,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ dispatch_only(vtos, true);
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp
index 5331a20f2fe..e6fd8b98668 100644
--- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp
+++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp
@@ -236,6 +236,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (UseCRC32Intrinsics) {
if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
warning("CRC32 intrinsics are not available on this CPU");
diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
index b13e18efc12..6902c47d71b 100644
--- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
@@ -38,6 +38,9 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
: _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 54e79f9d4bd..72adb74f4cc 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -1324,7 +1324,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type,
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
const Register return_pc = R31; // Must survive C-call to enable_stack_reserved_zone().
const Register polling_page = R12;
diff --git a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
index 60b0005e034..f90c1e8b1d2 100644
--- a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
@@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1000);
define_pd_global(intx, OnStackReplacePercentage, 1400);
-define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ProfileInterpreter, false);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(uintx, ReservedCodeCacheSize, 32*M);
define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M );
define_pd_global(uintx, ProfiledCodeHeapSize, 14*M );
diff --git a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
index 7a0c311e719..c576ddc95c4 100644
--- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
@@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 26);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index d58740d5a74..67b18dc0e31 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -211,7 +211,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Object locking
void lock_object (Register lock_reg, Register obj_reg);
- void unlock_object(Register lock_reg, bool check_for_exceptions = true);
+ void unlock_object(Register lock_reg);
// Interpreter profiling operations
void set_method_data_pointer_for_bcp();
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index 555cfd41418..292accb7852 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -223,7 +223,7 @@ void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register byt
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
- ld(R0, in_bytes(Thread::polling_page_offset()), R16_thread);
+ ld(R0, in_bytes(Thread::polling_word_offset()), R16_thread);
// Armed page has poll_bit set, if poll bit is cleared just continue.
andi_(R0, R0, SafepointMechanism::poll_bit());
beq(CCR0, dispatch);
@@ -878,8 +878,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state,
//
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (UseHeavyMonitors) {
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
} else {
// template code:
//
@@ -980,8 +979,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter.
bind(slow_case);
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
// }
align(32, 12);
bind(done);
@@ -995,7 +993,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// which must be initialized with the object to lock.
//
// Throw IllegalMonitorException if object is not locked by current thread.
-void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
+void InterpreterMacroAssembler::unlock_object(Register monitor) {
if (UseHeavyMonitors) {
call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor);
} else {
@@ -2401,8 +2399,7 @@ void InterpreterMacroAssembler::notify_method_entry() {
lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
cmpwi(CCR0, R0, 0);
beq(CCR0, jvmti_post_done);
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry),
- /*check_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry));
bind(jvmti_post_done);
}
@@ -2437,8 +2434,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
cmpwi(CCR0, R0, 0);
beq(CCR0, jvmti_post_done);
if (!is_native_method) { push(state); } // Expose tos to GC.
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
- /*check_exceptions=*/check_exceptions);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), check_exceptions);
if (!is_native_method) { pop(state); }
align(32, 12);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 3d3c39cf5d5..ca1c0c24987 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -382,25 +382,6 @@ AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp, int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0) {
- return RegisterOrConstant(value + offset);
- }
-
- // Load indirectly to solve generation ordering problem.
- // static address, no relocation
- int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true);
- ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0)
-
- if (offset != 0) {
- addi(tmp, tmp, offset);
- }
-
- return RegisterOrConstant(tmp);
-}
-
#ifndef PRODUCT
void MacroAssembler::pd_print_patched_instruction(address branch) {
Unimplemented(); // TODO: PPC port
@@ -3044,7 +3025,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
- ld(temp_reg, in_bytes(Thread::polling_page_offset()), R16_thread);
+ ld(temp_reg, in_bytes(Thread::polling_word_offset()), R16_thread);
// Armed page has poll_bit set.
andi_(temp_reg, temp_reg, SafepointMechanism::poll_bit());
bne(CCR0, slow_path);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index a8e43cabdc4..1859483c470 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -152,12 +152,6 @@ class MacroAssembler: public Assembler {
// Same as load_address.
inline void set_oop (AddressLiteral obj_addr, Register d);
- // Read runtime constant: Issue load if constant not yet established,
- // else use real constant.
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
//
// branch, jump
//
diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
index fbe956322a6..1134ed0366b 100644
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
@@ -197,7 +197,11 @@ intptr_t NativeMovConstReg::data() const {
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
narrowOop no = MacroAssembler::get_narrow_oop(addr, cb->content_begin());
- return cast_from_oop(CompressedOops::decode(no));
+ // We can reach here during GC with 'no' pointing to new object location
+ // while 'heap()->is_in' still reports false (e.g. with SerialGC).
+ // Therefore we use raw decoding.
+ if (CompressedOops::is_null(no)) return 0;
+ return cast_from_oop(CompressedOops::decode_raw(no));
} else {
assert(MacroAssembler::is_load_const_from_method_toc_at(addr), "must be load_const_from_pool");
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index d9c7c350e8e..b8f4f26995f 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -535,9 +535,7 @@ alloc_class chunk4 (
// information in this architecture description.
// 1) reg_class inline_cache_reg ( as defined in frame section )
-// 2) reg_class compiler_method_reg ( as defined in frame section )
-// 2) reg_class interpreter_method_reg ( as defined in frame section )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// ----------------------------
@@ -2064,103 +2062,88 @@ static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
}
const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode))
- return false;
+ if (!has_match_rule(opcode)) {
+ return false; // no match rule present
+ }
- bool ret_value = true;
switch (opcode) {
- case Op_SqrtD:
- return VM_Version::has_fsqrt();
- case Op_CountLeadingZerosI:
- case Op_CountLeadingZerosL:
- if (!UseCountLeadingZerosInstructionsPPC64)
- return false;
- break;
- case Op_CountTrailingZerosI:
- case Op_CountTrailingZerosL:
- if (!UseCountLeadingZerosInstructionsPPC64 &&
- !UseCountTrailingZerosInstructionsPPC64)
- return false;
- break;
-
- case Op_PopCountI:
- case Op_PopCountL:
- return (UsePopCountInstruction && VM_Version::has_popcntw());
-
- case Op_StrComp:
- return SpecialStringCompareTo;
- case Op_StrEquals:
- return SpecialStringEquals;
- case Op_StrIndexOf:
- case Op_StrIndexOfChar:
- return SpecialStringIndexOf;
- case Op_AddVB:
- case Op_AddVS:
- case Op_AddVI:
- case Op_AddVF:
- case Op_AddVD:
- case Op_SubVB:
- case Op_SubVS:
- case Op_SubVI:
- case Op_SubVF:
- case Op_SubVD:
- case Op_MulVS:
- case Op_MulVF:
- case Op_MulVD:
- case Op_DivVF:
- case Op_DivVD:
- case Op_AbsVF:
- case Op_AbsVD:
- case Op_NegVF:
- case Op_NegVD:
- case Op_SqrtVF:
- case Op_SqrtVD:
- case Op_AddVL:
- case Op_SubVL:
- case Op_MulVI:
- case Op_RoundDoubleModeV:
- return SuperwordUseVSX;
- case Op_PopCountVI:
- return (SuperwordUseVSX && UsePopCountInstruction);
- case Op_FmaVF:
- case Op_FmaVD:
- return (SuperwordUseVSX && UseFMA);
- case Op_Digit:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
- case Op_LowerCase:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
- case Op_UpperCase:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
- case Op_Whitespace:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
-
- case Op_CacheWB:
- case Op_CacheWBPreSync:
- case Op_CacheWBPostSync:
- if (!VM_Version::supports_data_cache_line_flush()) {
- ret_value = false;
- }
- break;
+ case Op_SqrtD:
+ return VM_Version::has_fsqrt();
+ case Op_CountLeadingZerosI:
+ case Op_CountLeadingZerosL:
+ return UseCountLeadingZerosInstructionsPPC64;
+ case Op_CountTrailingZerosI:
+ case Op_CountTrailingZerosL:
+ return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
+ case Op_PopCountI:
+ case Op_PopCountL:
+ return (UsePopCountInstruction && VM_Version::has_popcntw());
+
+ case Op_AddVB:
+ case Op_AddVS:
+ case Op_AddVI:
+ case Op_AddVF:
+ case Op_AddVD:
+ case Op_SubVB:
+ case Op_SubVS:
+ case Op_SubVI:
+ case Op_SubVF:
+ case Op_SubVD:
+ case Op_MulVS:
+ case Op_MulVF:
+ case Op_MulVD:
+ case Op_DivVF:
+ case Op_DivVD:
+ case Op_AbsVF:
+ case Op_AbsVD:
+ case Op_NegVF:
+ case Op_NegVD:
+ case Op_SqrtVF:
+ case Op_SqrtVD:
+ case Op_AddVL:
+ case Op_SubVL:
+ case Op_MulVI:
+ case Op_RoundDoubleModeV:
+ return SuperwordUseVSX;
+ case Op_PopCountVI:
+ return (SuperwordUseVSX && UsePopCountInstruction);
+ case Op_FmaVF:
+ case Op_FmaVD:
+ return (SuperwordUseVSX && UseFMA);
+
+ case Op_Digit:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
+ case Op_LowerCase:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
+ case Op_UpperCase:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
+ case Op_Whitespace:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
+
+ case Op_CacheWB:
+ case Op_CacheWBPreSync:
+ case Op_CacheWBPostSync:
+ return VM_Version::supports_data_cache_line_flush();
}
- return ret_value; // Per default match rules are supported.
+ return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-
- // TODO
- // identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return false; // not supported
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2340,10 +2323,6 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
- Unimplemented();
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -3855,9 +3834,6 @@ frame %{
// Inline Cache Register or method for I2C.
inline_cache_reg(R19); // R19_method
- // Method Register when calling interpreter.
- interpreter_method_reg(R19); // R19_method
-
// Optional: name the operand used by cisc-spilling to access
// [stack_pointer + offset].
cisc_spilling_operand_name(indOffset);
@@ -3912,7 +3888,7 @@ frame %{
// The `sig' array is to be updated. sig[j] represents the location
// of the j-th argument, either a register or a stack slot.
- // Comment taken from i486.ad:
+ // Comment taken from x86_32.ad:
// Body of function which returns an integer array locating
// arguments either in registers or in stack slots. Passed an array
// of ideal registers called "sig" and a "length" count. Stack-slot
@@ -3924,7 +3900,7 @@ frame %{
SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
%}
- // Comment taken from i486.ad:
+ // Comment taken from x86_32.ad:
// Body of function which returns an integer array locating
// arguments either in registers or in stack slots. Passed an array
// of ideal registers called "sig" and a "length" count. Stack-slot
@@ -4765,20 +4741,6 @@ operand inline_cache_regP(iRegPdst reg) %{
interface(REG_INTER);
%}
-operand compiler_method_regP(iRegPdst reg) %{
- constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand interpreter_method_regP(iRegPdst reg) %{
- constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Operands to remove register moves in unscaled mode.
// Match read/write registers with an EncodeP node if neither shift nor add are required.
operand iRegP2N(iRegPsrc reg) %{
@@ -6588,6 +6550,23 @@ instruct storeV16(indirect mem, vecX src) %{
ins_pipe(pipe_class_default);
%}
+// Reinterpret: only one vector size used: either L or X
+instruct reinterpretL(iRegLdst dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretX(vecX dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_empty);
+%}
+
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@@ -12618,9 +12597,10 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
ins_cost(180);
- format %{ "String IndexOfChar $haystack[0..$haycnt], $ch"
+ format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
" -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
ins_encode %{
__ string_indexof_char($result$$Register,
@@ -12631,6 +12611,25 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
ins_pipe(pipe_class_compare);
%}
+instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+ iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
+ flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ ins_cost(180);
+
+ format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
+ " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+ ins_encode %{
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $ch$$Register, 0 /* this is not used if the character is already in a register */,
+ $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_compare);
+%}
+
instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
iRegPsrc needle, uimmI15 needlecntImm,
iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index add61ad738c..e8498ba0ed3 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -35,6 +35,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1529,156 +1530,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty
}
}
-static void save_or_restore_arguments(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap* map,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- // If map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int slot = arg_save_area;
- // Save down double word first.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
- if (map != NULL) {
- __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- } else {
- __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- }
- } else if (in_regs[i].first()->is_Register() &&
- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
- int offset = slot * VMRegImpl::stack_slot_size;
- if (map != NULL) {
- __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
- if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));
- }
- } else {
- __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
- }
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
- }
- }
- // Save or restore single word registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int offset = slot * VMRegImpl::stack_slot_size;
- // Value lives in an input register. Save it on stack.
- switch (in_sig_bt[i]) {
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT:
- if (map != NULL) {
- __ stw(in_regs[i].first()->as_Register(), offset, R1_SP);
- } else {
- __ lwa(in_regs[i].first()->as_Register(), offset, R1_SP);
- }
- slot++;
- assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
- break;
- case T_ARRAY:
- case T_LONG:
- // handled above
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_FloatRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
- if (map != NULL) {
- __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- } else {
- __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- }
- }
- } else if (in_regs[i].first()->is_stack()) {
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
- }
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt,
- Register tmp_reg ) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
- __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GCLocker::needs_gc_address());
- __ cmplwi(CCR0, tmp_reg, 0);
- __ beq(CCR0, cont);
-
- // Save down any values that are live in registers and call into the
- // runtime to halt for a GC.
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- __ mr(R3_ARG1, R16_thread);
- __ set_last_Java_frame(R1_SP, noreg);
-
- __ block_comment("block_for_jni_critical");
- address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
-#if defined(ABI_ELFv2)
- __ call_c(entry_point, relocInfo::runtime_call_type);
-#else
- __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
-#endif
- address start = __ pc() - __ offset(),
- calls_return_pc = __ last_calls_return_pc();
- oop_maps->add_gc_map(calls_return_pc - start, map);
-
- __ reset_last_Java_frame();
-
- // Reload all the register arguments.
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
-
- __ BIND(cont);
-
-#ifdef ASSERT
- if (StressCriticalJNINatives) {
- // Stress register saving.
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- // Destroy argument registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- const Register reg = in_regs[i].first()->as_Register();
- __ neg(reg, reg);
- } else if (in_regs[i].first()->is_FloatRegister()) {
- __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-#endif
-}
-
static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
if (src.first()->is_stack()) {
if (dst.first()->is_stack()) {
@@ -1820,25 +1671,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they cannot stop for GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
-//
nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
const methodHandle& method,
int compile_id,
@@ -2145,11 +1983,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
OopMapSet *oop_maps = new OopMapSet();
OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset,
- oop_maps, in_regs, in_sig_bt, r_temp_1);
- }
-
// Move arguments from register/stack to register/stack.
// --------------------------------------------------------------------------
//
@@ -2350,18 +2183,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(locked);
}
-
- // Publish thread state
- // --------------------------------------------------------------------------
-
// Use that pc we placed in r_return_pc a while back as the current frame anchor.
__ set_last_Java_frame(R1_SP, r_return_pc);
- // Transition from _thread_in_Java to _thread_in_native.
- __ li(R0, _thread_in_native);
- __ release();
- // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
- __ stw(R0, thread_(thread_state));
+ if (!is_critical_native) {
+ // Publish thread state
+ // --------------------------------------------------------------------------
+
+ // Transition from _thread_in_Java to _thread_in_native.
+ __ li(R0, _thread_in_native);
+ __ release();
+ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
+ __ stw(R0, thread_(thread_state));
+ }
// The JNI call
@@ -2421,6 +2255,22 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
break;
}
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ Register sync_state = r_temp_5;
+ __ safepoint_poll(needs_safepoint, sync_state);
+
+ Register suspend_flags = r_temp_6;
+ __ lwz(suspend_flags, thread_(suspend_flags));
+ __ cmpwi(CCR1, suspend_flags, 0);
+ __ beq(CCR1, after_transition);
+ __ bind(needs_safepoint);
+ }
// Publish thread state
// --------------------------------------------------------------------------
@@ -2448,7 +2298,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Block, if necessary, before resuming in _thread_in_Java state.
// In order for GC to work, don't clear the last_Java_sp until after blocking.
- Label after_transition;
{
Label no_block, sync;
@@ -2476,31 +2325,27 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(sync);
__ isync();
- address entry_point = is_critical_native
- ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
- : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+ address entry_point =
+ CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
save_native_result(masm, ret_type, workspace_slot_offset);
__ call_VM_leaf(entry_point, R16_thread);
restore_native_result(masm, ret_type, workspace_slot_offset);
- if (is_critical_native) {
- __ b(after_transition); // No thread state transition here.
- }
__ bind(no_block);
- }
- // Publish thread state.
- // --------------------------------------------------------------------------
+ // Publish thread state.
+ // --------------------------------------------------------------------------
- // Thread state is thread_in_native_trans. Any safepoint blocking has
- // already happened so we can now change state to _thread_in_Java.
+ // Thread state is thread_in_native_trans. Any safepoint blocking has
+ // already happened so we can now change state to _thread_in_Java.
- // Transition from _thread_in_native_trans to _thread_in_Java.
- __ li(R0, _thread_in_Java);
- __ lwsync(); // Acquire safepoint and suspend state, release thread state.
- // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
- __ stw(R0, thread_(thread_state));
- __ bind(after_transition);
+ // Transition from _thread_in_native_trans to _thread_in_Java.
+ __ li(R0, _thread_in_Java);
+ __ lwsync(); // Acquire safepoint and suspend state, release thread state.
+ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
+ __ stw(R0, thread_(thread_state));
+ __ bind(after_transition);
+ }
// Reguard any pages if necessary.
// --------------------------------------------------------------------------
@@ -2657,10 +2502,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
in_ByteSize(lock_offset),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
}
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 994f0a93827..525e4f05255 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -1549,9 +1549,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// Handle exceptions
if (synchronized) {
- // Don't check for exceptions since we're still in the i2n frame. Do that
- // manually afterwards.
- __ unlock_object(R26_monitor, false); // Can also unlock methods.
+ __ unlock_object(R26_monitor); // Can also unlock methods.
}
// Reset active handles after returning from native.
@@ -1592,9 +1590,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
BIND(exception_return_sync_check);
if (synchronized) {
- // Don't check for exceptions since we're still in the i2n frame. Do that
- // manually afterwards.
- __ unlock_object(R26_monitor, false); // Can also unlock methods.
+ __ unlock_object(R26_monitor); // Can also unlock methods.
}
BIND(exception_return_sync_check_already_unlocked);
@@ -2105,7 +2101,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
// Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
__ ld(R4_ARG2, 0, R18_locals);
__ call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp);
- __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);
+
__ cmpdi(CCR0, R4_ARG2, 0);
__ beq(CCR0, L_done);
__ std(R4_ARG2, wordSize, R15_esp);
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index e9ccfc7c481..cc341d83072 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -2173,7 +2173,7 @@ void TemplateTable::_return(TosState state) {
if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
- __ ld(R11_scratch1, in_bytes(Thread::polling_page_offset()), R16_thread);
+ __ ld(R11_scratch1, in_bytes(Thread::polling_word_offset()), R16_thread);
__ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit());
__ beq(CCR0, no_safepoint);
__ push(state);
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
index fd62cb5813a..f64999d108a 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@@ -331,6 +331,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
index 367d2a43af5..329c163f313 100644
--- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
@@ -41,6 +41,10 @@
#undef CHECK_BAILOUT
#define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; }
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
+
RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
: _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
assert(info != NULL, "must have info");
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 24c8178f1dc..4c7dc79e5e7 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -1207,7 +1207,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type,
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() ||
(result->is_single_cpu() && result->as_register() == Z_R2) ||
(result->is_double_cpu() && result->as_register_lo() == Z_R2) ||
diff --git a/src/hotspot/cpu/s390/c1_globals_s390.hpp b/src/hotspot/cpu/s390/c1_globals_s390.hpp
index 99e26e5e3f8..7fcb1ee0617 100644
--- a/src/hotspot/cpu/s390/c1_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c1_globals_s390.hpp
@@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1000);
define_pd_global(intx, OnStackReplacePercentage, 1400);
-define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ProfileInterpreter, false);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(uintx, ReservedCodeCacheSize, 32*M);
define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M);
define_pd_global(uintx, ProfiledCodeHeapSize, 14*M);
diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp
index 2f44fa73a2e..64d5585d616 100644
--- a/src/hotspot/cpu/s390/c2_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp
@@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 10); // Medium size registe
define_pd_global(intx, InteriorEntryAlignment, 2);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 12000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, MinJumpTableSize, 18);
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index d612d528c51..4f44359b04d 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -121,7 +121,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bo
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set, if poll bit is cleared just continue.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_braz(dispatch);
@@ -969,8 +969,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state,
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (UseHeavyMonitors) {
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/false);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
return;
}
@@ -1061,9 +1060,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter.
bind(slow_case);
-
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/false);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
// }
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index c71a15daa7c..d7c95ee96ee 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -1810,34 +1810,6 @@ void MacroAssembler::c2bool(Register r, Register t) {
z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0) {
- return RegisterOrConstant(value + offset);
- }
-
- BLOCK_COMMENT("delayed_value {");
- // Load indirectly to solve generation ordering problem.
- load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
- z_lg(tmp, 0, tmp); // tmp = *tmp;
-
-#ifdef ASSERT
- NearLabel L;
- compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
- z_illtrap();
- bind(L);
-#endif
-
- if (offset != 0) {
- z_agfi(tmp, offset); // tmp = tmp + offset;
- }
-
- BLOCK_COMMENT("} delayed_value");
- return RegisterOrConstant(tmp);
-}
-
// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
// and return the resulting instruction.
// Dest_pos and inst_pos are 32 bit only. These parms can only designate
@@ -2680,7 +2652,7 @@ uint MacroAssembler::get_poll_register(address instr_loc) {
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_brnaz(slow_path);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
index 41294b0fe87..113a1a3db2a 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@@ -350,9 +350,6 @@ class MacroAssembler: public Assembler {
// Uses constant_metadata_address.
inline bool set_metadata_constant(Metadata* md, Register d);
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
//
// branch, jump
//
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index bb98182d781..de1565194ed 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -278,9 +278,7 @@ alloc_class chunk2(
// information in this architecture description.
// 1) reg_class inline_cache_reg (as defined in frame section)
-// 2) reg_class compiler_method_reg (as defined in frame section)
-// 2) reg_class interpreter_method_reg (as defined in frame section)
-// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */)
+// 2) reg_class stack_slots(/* one chunk of stack-based "registers" */)
// Integer Register Classes
reg_class z_int_reg(
@@ -1513,66 +1511,38 @@ static Register reg_to_register_object(int register_encoding) {
}
const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode)) return false;
+ if (!has_match_rule(opcode)) {
+ return false; // no match rule present
+ }
switch (opcode) {
- case Op_CountLeadingZerosI:
- case Op_CountLeadingZerosL:
- case Op_CountTrailingZerosI:
- case Op_CountTrailingZerosL:
- // Implementation requires FLOGR instruction, which is available since z9.
- return true;
-
case Op_ReverseBytesI:
case Op_ReverseBytesL:
return UseByteReverseInstruction;
-
- // PopCount supported by H/W from z/Architecture G5 (z196) on.
case Op_PopCountI:
case Op_PopCountL:
- return UsePopCountInstruction && VM_Version::has_PopCount();
-
- case Op_StrComp:
- return SpecialStringCompareTo;
- case Op_StrEquals:
- return SpecialStringEquals;
- case Op_StrIndexOf:
- case Op_StrIndexOfChar:
- return SpecialStringIndexOf;
-
- case Op_GetAndAddI:
- case Op_GetAndAddL:
- return true;
- // return VM_Version::has_AtomicMemWithImmALUOps();
- case Op_GetAndSetI:
- case Op_GetAndSetL:
- case Op_GetAndSetP:
- case Op_GetAndSetN:
- return true; // General CAS implementation, always available.
-
- default:
- return true; // Per default match rules are supported.
- // BUT: make sure match rule is not disabled by a false predicate!
+ // PopCount supported by H/W from z/Architecture G5 (z196) on.
+ return (UsePopCountInstruction && VM_Version::has_PopCount());
}
- return true; // Per default match rules are supported.
- // BUT: make sure match rule is not disabled by a false predicate!
+ return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- // TODO
- // Identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return false; // not supported
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2462,12 +2432,6 @@ frame %{
// Tos is loaded in run_compiled_code to Z_ARG5=Z_R6.
// interpreter_arg_ptr_reg(Z_R6);
- // Temporary in compiled entry-points
- // compiler_method_reg(Z_R1);//Z_R1_scratch
-
- // Method Register when calling interpreter
- interpreter_method_reg(Z_R9);//Z_method
-
// Optional: name the operand used by cisc-spilling to access
// [stack_pointer + offset].
cisc_spilling_operand_name(indOffset12);
@@ -3531,20 +3495,6 @@ operand inline_cache_regP(iRegP reg) %{
interface(REG_INTER);
%}
-operand compiler_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand interpreter_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Operands to remove register moves in unscaled mode.
// Match read/write registers with an EncodeP node if neither shift nor add are required.
operand iRegP2N(iRegP reg) %{
@@ -10172,8 +10122,9 @@ instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2
instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
ins_cost(200);
- format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+ format %{ "StringUTF16 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
ins_encode %{
__ string_indexof_char($result$$Register,
$haystack$$Register, $haycnt$$Register,
@@ -10183,6 +10134,21 @@ instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, rod
ins_pipe(pipe_class_dummy);
%}
+instruct indexOfChar_L(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ ins_cost(200);
+ format %{ "StringLatin1 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+ ins_encode %{
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $ch$$Register, 0 /* unused, ch is in register */,
+ $oddReg$$Register, $evenReg$$Register, true /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
@@ -10809,7 +10775,7 @@ instruct Repl2F_imm0(iRegL dst, immFp0 src) %{
ins_pipe(pipe_class_dummy);
%}
-// Store
+// Load/Store vector
// Store Aligned Packed Byte register to memory (8 Bytes).
instruct storeA8B(memory mem, iRegL src) %{
@@ -10823,8 +10789,6 @@ instruct storeA8B(memory mem, iRegL src) %{
ins_pipe(pipe_class_dummy);
%}
-// Load
-
instruct loadV8(iRegL dst, memory mem) %{
match(Set dst (LoadVector mem));
predicate(n->as_LoadVector()->memory_size() == 8);
@@ -10836,6 +10800,15 @@ instruct loadV8(iRegL dst, memory mem) %{
ins_pipe(pipe_class_dummy);
%}
+// Reinterpret: only one vector size used
+instruct reinterpret(iRegL dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_dummy);
+%}
+
//----------POPULATION COUNT RULES--------------------------------------------
// Byte reverse
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index 48ac8ae443c..a0c46b182ff 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -35,6 +35,7 @@
#include "nativeInst_s390.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "registerSaver_s390.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -1284,163 +1285,6 @@ static void move32_64(MacroAssembler *masm,
}
}
-static void save_or_restore_arguments(MacroAssembler *masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap *map,
- VMRegPair *in_regs,
- BasicType *in_sig_bt) {
-
- // If map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int slot = arg_save_area;
- // Handle double words first.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
- const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ freg2mem_opt(freg, stackaddr);
- } else {
- __ mem2freg_opt(freg, stackaddr);
- }
- } else if (in_regs[i].first()->is_Register() &&
- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
- int offset = slot * VMRegImpl::stack_slot_size;
- const Register reg = in_regs[i].first()->as_Register();
- if (map != NULL) {
- __ z_stg(reg, offset, Z_SP);
- if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));
- }
- } else {
- __ z_lg(reg, offset, Z_SP);
- }
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
- }
- }
-
- // Save or restore single word registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int offset = slot * VMRegImpl::stack_slot_size;
- // Value lives in an input register. Save it on stack.
- switch (in_sig_bt[i]) {
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT: {
- const Register reg = in_regs[i].first()->as_Register();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ z_st(reg, stackaddr);
- } else {
- __ z_lgf(reg, stackaddr);
- }
- slot++;
- assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
- break;
- }
- case T_ARRAY:
- case T_LONG:
- // handled above
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_FloatRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
- const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ freg2mem_opt(freg, stackaddr, false);
- } else {
- __ mem2freg_opt(freg, stackaddr, false);
- }
- }
- } else if (in_regs[i].first()->is_stack() &&
- in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler *masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMapSet *oop_maps,
- VMRegPair *in_regs,
- BasicType *in_sig_bt) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
-
- // Check GCLocker::_needs_gc flag.
- __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
- __ z_cli(0, Z_R1_scratch, 0);
- __ z_bre(cont);
-
- // Save down any values that are live in registers and call into the
- // runtime to halt for a GC.
- OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- address the_pc = __ pc();
- __ set_last_Java_frame(Z_SP, noreg);
-
- __ block_comment("block_for_jni_critical");
- __ z_lgr(Z_ARG1, Z_thread);
-
- address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
- __ call_c(entry_point);
- oop_maps->add_gc_map(__ offset(), map);
-
- __ reset_last_Java_frame();
-
- // Reload all the register arguments.
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
-
- __ bind(cont);
-
- if (StressCriticalJNINatives) {
- // Stress register saving
- OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- // Destroy argument registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- // Don't set CC.
- __ clear_reg(in_regs[i].first()->as_Register(), true, false);
- } else {
- if (in_regs[i].first()->is_FloatRegister()) {
- FloatRegister fr = in_regs[i].first()->as_FloatRegister();
- __ z_lcdbr(fr, fr);
- }
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-}
-
static void move_ptr(MacroAssembler *masm,
VMRegPair src,
VMRegPair dst,
@@ -1857,12 +1701,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
OopMapSet *oop_maps = new OopMapSet();
OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
- oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
- }
-
-
//////////////////////////////////////////////////////////////////////
//
// The Grand Shuffle
@@ -2091,9 +1929,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Use that pc we placed in Z_R10 a while back as the current frame anchor.
__ set_last_Java_frame(Z_SP, Z_R10);
- // Transition from _thread_in_Java to _thread_in_native.
- __ set_thread_state(_thread_in_native);
-
+ if (!is_critical_native) {
+ // Transition from _thread_in_Java to _thread_in_native.
+ __ set_thread_state(_thread_in_native);
+ }
//////////////////////////////////////////////////////////////////////
// This is the JNI call.
@@ -2139,6 +1978,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
break;
}
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ // Does this need to save_native_result and fences?
+ __ safepoint_poll(needs_safepoint, Z_R1);
+ __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
+ __ z_bre(after_transition);
+ __ bind(needs_safepoint);
+ }
// Switch thread to "native transition" state before reading the synchronization state.
// This additional state is necessary because reading and testing the synchronization
@@ -2158,7 +2010,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Block, if necessary, before resuming in _thread_in_Java state.
// In order for GC to work, don't clear the last_Java_sp until after blocking.
//--------------------------------------------------------------------
- Label after_transition;
{
Label no_block, sync;
@@ -2180,15 +2031,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(sync);
__ z_acquire();
- address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
- : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+ address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
__ call_VM_leaf(entry_point, Z_thread);
- if (is_critical_native) {
- restore_native_result(masm, ret_type, workspace_slot_offset);
- __ z_bru(after_transition); // No thread state transition here.
- }
__ bind(no_block);
restore_native_result(masm, ret_type, workspace_slot_offset);
}
@@ -2201,7 +2047,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ set_thread_state(_thread_in_Java);
__ bind(after_transition);
-
//--------------------------------------------------------------------
// Reguard any pages if necessary.
// Protect native result from being destroyed.
@@ -2384,10 +2229,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
in_ByteSize(lock_offset),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
}
diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
index 5d8b11332d8..e1862f11c49 100644
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
@@ -856,7 +856,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_
// Compute the beginning of the protected zone minus the requested frame size.
__ z_sgr(tmp1, tmp2);
- __ add2reg(tmp1, JavaThread::stack_guard_zone_size());
+ __ add2reg(tmp1, StackOverflow::stack_guard_zone_size());
// Add in the size of the frame (which is the same as subtracting it from the
// SP, which would take another register.
diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp
index 9c372db9e78..7a4cf869c30 100644
--- a/src/hotspot/cpu/s390/templateTable_s390.cpp
+++ b/src/hotspot/cpu/s390/templateTable_s390.cpp
@@ -2007,7 +2007,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Out-of-line code runtime calls.
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
@@ -2377,7 +2377,7 @@ void TemplateTable::_return(TosState state) {
if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
__ z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
__ z_braz(no_safepoint);
__ push(state);
diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp
index 3460a767fac..0a769c9401f 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@@ -221,6 +221,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
@@ -831,7 +836,7 @@ void VM_Version::determine_features() {
code_end-code, cbuf_size, cbuf_size-(code_end-code));
// Use existing decode function. This enables the [MachCode] format which is needed to DecodeErrorFile.
- Disassembler::decode(&cbuf, code, code_end, tty);
+ Disassembler::decode(code, code_end, tty);
}
// Prepare for detection code execution and clear work buffer.
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index ef04d33c7f4..3933bac000f 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -984,6 +984,8 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x61: // pcmpestri r, r/a, #8
case 0x70: // pshufd r, r/a, #8
case 0x73: // psrldq r, #8
+ case 0x1f: // evpcmpd/evpcmpq
+ case 0x3f: // evpcmpb/evpcmpw
tail_size = 1; // the imm8
break;
default:
@@ -1209,6 +1211,11 @@ void Assembler::addb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::addw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x03, 0xC0, dst, src);
+}
+
void Assembler::addw(Address dst, int imm16) {
InstructionMark im(this);
emit_int8(0x66);
@@ -1415,6 +1422,11 @@ void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int16((unsigned char)0xDD, (0xC0 | encode));
}
+void Assembler::andw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x23, 0xC0, dst, src);
+}
+
void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -1783,6 +1795,13 @@ void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
emit_int16((unsigned char)0xE6, (0xC0 | encode));
}
+void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xE6, (0xC0 | encode));
+}
+
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -1790,6 +1809,13 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
emit_int16(0x5B, (0xC0 | encode));
}
+void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5B, (0xC0 | encode));
+}
+
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -1912,18 +1938,18 @@ void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
}
void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
- vector_len == AVX_256bit? VM_Version::supports_avx2() :
- vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x1C, (0xC0 | encode));
}
void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
- vector_len == AVX_256bit? VM_Version::supports_avx2() :
- vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x1D, (0xC0 | encode));
@@ -1946,6 +1972,85 @@ void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x1F, (0xC0 | encode));
}
+void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5A, (0xC0 | encode));
+}
+
+void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ attributes.set_rex_vex_w_reverted();
+ emit_int16(0x5A, (0xC0 | encode));
+}
+
+void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5B, (0xC0 | encode));
+}
+
+void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xE6, (0xC0 | encode));
+}
+
+void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x30, (0xC0 | encode));
+}
+
+void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x33, (0xC0 | encode));
+}
+
+void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x32, (0xC0 | encode));
+}
+
+void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x34, (0xC0 | encode));
+}
+
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@@ -2543,28 +2648,34 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
}
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
-void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
-void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
@@ -2572,132 +2683,234 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
-void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
+void Assembler::evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
+ InstructionMark im(this);
+ bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
+ int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
+ InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(src != xnoreg, "sanity");
+ assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
+ InstructionMark im(this);
+ bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
+ int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
+ InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst);
+}
+
+void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
-void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
+ // Unmasked instruction
+ evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
+ // Unmasked isntruction
+ evmovdqul(dst, k0, src, /*merge*/ true, vector_len);
+}
+
+void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ if (dst->encoding() == src->encoding()) return;
+ evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
+ // Unmasked instruction
+ evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ evmovdquq(dst, k0, src, /*merge*/ true, vector_len);
+}
+
+void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@@ -2775,6 +2988,29 @@ void Assembler::movq(Address dst, XMMRegister src) {
emit_operand(src, dst);
}
+void Assembler::movq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xD6, (0xC0 | encode));
+}
+
+void Assembler::movq(Register dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ // swap src/dst to get correct prefix
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x7E, (0xC0 | encode));
+}
+
+void Assembler::movq(XMMRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6E, (0xC0 | encode));
+}
+
void Assembler::movsbl(Register dst, Address src) { // movsxb
InstructionMark im(this);
prefix(src, dst);
@@ -3274,6 +3510,11 @@ void Assembler::notl(Register dst) {
emit_int16((unsigned char)0xF7, (0xD0 | encode));
}
+void Assembler::orw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x0B, 0xC0, dst, src);
+}
+
void Assembler::orl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -3312,6 +3553,34 @@ void Assembler::orb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x63, (0xC0 | encode));
+}
+
+void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x63, (0xC0 | encode));
+}
+
+void Assembler::packssdw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6B, (0xC0 | encode));
+}
+
+void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6B, (0xC0 | encode));
+}
+
void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -3337,21 +3606,74 @@ void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x67, (0xC0 | encode));
}
+void Assembler::packusdw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x2B, (0xC0 | encode));
+}
+
+void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x2B, (0xC0 | encode));
+}
+
void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
assert(VM_Version::supports_avx2(), "");
+ assert(vector_len != AVX_128bit, "");
+ // VEX.256.66.0F3A.W1 00 /r ib
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x00, (0xC0 | encode), imm8);
}
void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(UseAVX > 2, "requires AVX512F");
+ assert(vector_len == AVX_256bit ? VM_Version::supports_avx512vl() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : false, "not supported");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x36, (0xC0 | encode));
}
+void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512_vbmi(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0x8D, (0xC0 | encode));
+}
+
+void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0x8D, (0xC0 | encode));
+}
+
+void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ // VEX.NDS.256.66.0F38.W0 36 /r
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x36, (0xC0 | encode));
+}
+
+void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ // VEX.NDS.256.66.0F38.W0 36 /r
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x36);
+ emit_operand(dst, src);
+}
+
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -3366,6 +3688,28 @@ void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, in
emit_int24(0x06, (0xC0 | encode), imm8);
}
+void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x04, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x05, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x01, (0xC0 | encode), imm8);
+}
+
void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -3374,7 +3718,6 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x76, (0xC0 | encode));
}
-
void Assembler::pause() {
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
}
@@ -3408,9 +3751,18 @@ void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
emit_int16(0x74, (0xC0 | encode));
}
+void Assembler::vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(cond_encoding, (0xC0 | encode));
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x74, (0xC0 | encode));
@@ -3497,7 +3849,7 @@ void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vect
void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
@@ -3517,7 +3869,8 @@ void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x75, (0xC0 | encode));
@@ -3554,29 +3907,32 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x76, (0xC0 | encode));
}
// In this context, kdst is written the mask used to process the equal components
-void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x76, (0xC0 | encode));
}
-void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
- attributes.reset_is_clear_context();
attributes.set_is_evex_instruction();
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x76);
@@ -3591,6 +3947,13 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
emit_int16(0x29, (0xC0 | encode));
}
+void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(cond_encoding, (0xC0 | encode));
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
@@ -3623,11 +3986,36 @@ void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vect
emit_operand(as_Register(dst_enc), src);
}
-void Assembler::pmovmskb(Register dst, XMMRegister src) {
- assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xD7, (0xC0 | encode));
+void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
+}
+
+void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
+}
+
+void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x37, (0xC0 | encode));
+}
+
+void Assembler::pmovmskb(Register dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xD7, (0xC0 | encode));
}
void Assembler::vpmovmskb(Register dst, XMMRegister src) {
@@ -3639,14 +4027,14 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src) {
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x16, (0xC0 | encode), imm8);
}
void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3656,14 +4044,14 @@ void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x16, (0xC0 | encode), imm8);
}
void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3673,14 +4061,14 @@ void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8);
}
void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x15);
@@ -3688,9 +4076,16 @@ void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x14, (0xC0 | encode), imm8);
+}
+
void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x14);
@@ -3700,14 +4095,14 @@ void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x22, (0xC0 | encode), imm8);
}
void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3715,16 +4110,23 @@ void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x22, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x22, (0xC0 | encode), imm8);
}
void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3732,16 +4134,23 @@ void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x22, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
}
void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC4);
@@ -3749,9 +4158,16 @@ void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x20);
@@ -3759,6 +4175,34 @@ void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x20, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x20, (0xC0 | encode), imm8);
+}
+
+void Assembler::insertps(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x21, (0xC0 | encode), imm8);
+}
+
+void Assembler::vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x21, (0xC0 | encode), imm8);
+}
+
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
@@ -3783,6 +4227,41 @@ void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
emit_int16(0x20, (0xC0 | encode));
}
+void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x21, (0xC0 | encode));
+}
+
+void Assembler::pmovzxbd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x22, (0xC0 | encode));
+}
+
+void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x23, (0xC0 | encode));
+}
+
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
@@ -3816,7 +4295,7 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec
assert(VM_Version::supports_avx512vlbw(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -3824,6 +4303,86 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec
emit_int8(0x30);
emit_operand(dst, src);
}
+
+void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xDB, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x32, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x21, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x22, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x23, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x24, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x25, (0xC0 | encode));
+}
+
void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
@@ -4050,6 +4609,14 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
emit_int8(mode & 0xFF);
}
+void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) {
+ assert(isByte(mode), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
+}
+
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4080,6 +4647,35 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
}
+void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(isByte(imm8), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(isByte(imm8), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift left 128 bit value in dst XMMRegister by shift number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4151,6 +4747,13 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src) {
emit_int16(0x17, (0xC0 | encode));
}
+void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x17, (0xC0 | encode));
+}
+
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -4881,6 +5484,11 @@ void Assembler::xorb(Register dst, Address src) {
emit_operand(dst, src);
}
+void Assembler::xorw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x33, 0xC0, dst, src);
+}
+
// AVX 3-operands scalar float-point arithmetic instructions
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
@@ -5794,6 +6402,13 @@ void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
emit_int16(0x40, (0xC0 | encode));
}
+void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -5816,6 +6431,13 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
emit_int16(0x40, (0xC0 | encode));
}
+void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
@@ -5847,66 +6469,227 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vecto
emit_operand(dst, src);
}
-// Shift packed integers left by specified number of bits.
-void Assembler::psllw(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+// Min, max
+void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 71 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x38, (0xC0 | encode));
}
-void Assembler::pslld(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 72 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
+void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x38, (0xC0 | encode));
}
-void Assembler::psllq(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 73 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
+void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEA, (0xC0 | encode));
}
-void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF1, (0xC0 | encode));
+void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEA, (0xC0 | encode));
}
-void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF2, (0xC0 | encode));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- attributes.set_rex_vex_w_reverted();
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF3, (0xC0 | encode));
+void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
- assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 71 /6 ib
- int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512F");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
- assert(UseAVX > 0, "requires some form of AVX");
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+void Assembler::minps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+
+void Assembler::minpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+
+void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3C, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3C, (0xC0 | encode));
+}
+
+void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEE, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEE, (0xC0 | encode));
+}
+
+void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512F");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::maxps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF1, (0xC0 | encode));
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF2, (0xC0 | encode));
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF3, (0xC0 | encode));
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}
@@ -6168,13 +6951,67 @@ void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0xDB, (0xC0 | encode));
}
+//Variable Shift packed integers logically left.
+void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x47, (0xC0 | encode));
+}
+
+void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x47, (0xC0 | encode));
+}
+
+//Variable Shift packed integers logically right.
+void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x45, (0xC0 | encode));
+}
+
+void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x45, (0xC0 | encode));
+}
+
+//Variable right Shift arithmetic packed integers .
+void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x46, (0xC0 | encode));
+}
+
+void Assembler::evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x11, (0xC0 | encode));
+}
+
+void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512");
+ assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x46, (0xC0 | encode));
+}
+
void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
- emit_int8(0x71);
- emit_int8((0xC0 | encode));
+ emit_int16(0x71, (0xC0 | encode));
}
void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
@@ -6200,7 +7037,6 @@ void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0xDF, (0xC0 | encode));
}
-
void Assembler::por(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6233,6 +7069,35 @@ void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vec
}
+void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEB, (0xC0 | encode));
+}
+
+void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xEB);
+ emit_operand(dst, src);
+}
+
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6257,13 +7122,33 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_
emit_operand(dst, src);
}
+void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires some form of EVEX");
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
+}
+
+void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
+}
+
void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int8((unsigned char)0xEF);
- emit_int8((0xC0 | encode));
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
}
void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
@@ -6960,12 +7845,67 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x7C, (0xC0 | encode));
}
+
+void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
- attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -6974,6 +7914,116 @@ void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int ve
emit_int8((unsigned char)0x90);
emit_operand(dst, src);
}
+
+void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA0);
+ emit_operand(src, dst);
+}
+
+void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA0);
+ emit_operand(src, dst);
+}
+
+void Assembler::evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA2);
+ emit_operand(src, dst);
+}
+
+void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA2);
+ emit_operand(src, dst);
+}
// Carry-Less Multiplication Quadword
void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
assert(VM_Version::supports_clmul(), "");
@@ -7571,7 +8621,8 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
// fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
// last is EVEX.z for zero/merge actions
- if (_attributes->is_no_reg_mask() == false) {
+ if (_attributes->is_no_reg_mask() == false &&
+ _attributes->get_embedded_opmask_register_specifier() != 0) {
byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
}
@@ -7739,7 +8790,7 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
emit_int16(0x5D, (0xC0 | encode));
}
-void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
+void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -7756,8 +8807,8 @@ void Assembler::blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int24(0x4C, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+ assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -7765,28 +8816,330 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int24(0x4B, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
- emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop));
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
}
-void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), (unsigned char)comparison);
+}
+
+void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
+}
+
+void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
+}
+
+void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x14, (0xC0 | encode));
+}
+
+void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x15, (0xC0 | encode));
+}
+
+void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x10, (0xC0 | encode));
+}
+
+void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+ assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
int src2_enc = src2->encoding();
emit_int24(0x4A, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
- assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+void Assembler::vblendps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
- emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
+ emit_int24(0x0C, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x37, (0xC0 | encode));
+}
+
+void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x1F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x1F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x1F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x1F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x3F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x3F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x3F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x3F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ int mask_enc = mask->encoding();
+ emit_int24(0x4C, (0xC0 | encode), 0xF0 & mask_enc << 4);
+}
+
+void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
+}
+
+void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
}
void Assembler::shlxl(Register dst, Register src1, Register src2) {
@@ -7803,6 +9156,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
+void Assembler::shrxq(Register dst, Register src1, Register src2) {
+ assert(VM_Version::supports_bmi2(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0xF7, (0xC0 | encode));
+}
+
#ifndef _LP64
void Assembler::incl(Register dst) {
@@ -8443,7 +9803,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) {
void Assembler::cmpq(Address dst, Register src) {
InstructionMark im(this);
- emit_int16(get_prefixq(dst, src), 0x3B);
+ emit_int16(get_prefixq(dst, src), 0x39);
emit_operand(src, dst);
}
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 283285dc347..1d6eb41bd05 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -588,6 +588,7 @@ class Assembler : public AbstractAssembler {
#endif
};
+ // Comparison predicates for integral types & FP types when using SSE
enum ComparisonPredicate {
eq = 0,
lt = 1,
@@ -599,6 +600,51 @@ class Assembler : public AbstractAssembler {
_true = 7
};
+ // Comparison predicates for FP types when using AVX
+ // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
+ // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
+ enum ComparisonPredicateFP {
+ EQ_OQ = 0,
+ LT_OS = 1,
+ LE_OS = 2,
+ UNORD_Q = 3,
+ NEQ_UQ = 4,
+ NLT_US = 5,
+ NLE_US = 6,
+ ORD_Q = 7,
+ EQ_UQ = 8,
+ NGE_US = 9,
+ NGT_US = 0xA,
+ FALSE_OQ = 0XB,
+ NEQ_OQ = 0xC,
+ GE_OS = 0xD,
+ GT_OS = 0xE,
+ TRUE_UQ = 0xF,
+ EQ_OS = 0x10,
+ LT_OQ = 0x11,
+ LE_OQ = 0x12,
+ UNORD_S = 0x13,
+ NEQ_US = 0x14,
+ NLT_UQ = 0x15,
+ NLE_UQ = 0x16,
+ ORD_S = 0x17,
+ EQ_US = 0x18,
+ NGE_UQ = 0x19,
+ NGT_UQ = 0x1A,
+ FALSE_OS = 0x1B,
+ NEQ_OS = 0x1C,
+ GE_OQ = 0x1D,
+ GT_OQ = 0x1E,
+ TRUE_US =0x1F
+ };
+
+ enum Width {
+ B = 0,
+ W = 1,
+ D = 2,
+ Q = 3
+ };
+
//---< calculate length of instruction >---
// As instruction size can't be found out easily on x86/x64,
// we just use '4' for len and maxlen.
@@ -794,7 +840,6 @@ class Assembler : public AbstractAssembler {
void decl(Register dst);
void decl(Address dst);
- void decq(Register dst);
void decq(Address dst);
void incl(Register dst);
@@ -879,6 +924,7 @@ class Assembler : public AbstractAssembler {
void popa_uncached();
#endif
void vzeroupper_uncached();
+ void decq(Register dst);
void pusha();
void popa();
@@ -918,6 +964,7 @@ class Assembler : public AbstractAssembler {
void adcq(Register dst, Register src);
void addb(Address dst, int imm8);
+ void addw(Register dst, Register src);
void addw(Address dst, int imm16);
void addl(Address dst, int32_t imm32);
@@ -968,6 +1015,8 @@ class Assembler : public AbstractAssembler {
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void andw(Register dst, Register src);
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
@@ -1093,9 +1142,11 @@ class Assembler : public AbstractAssembler {
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
void cvtdq2ps(XMMRegister dst, XMMRegister src);
+ void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
@@ -1111,8 +1162,25 @@ class Assembler : public AbstractAssembler {
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
+ // Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
+ // Convert vector float and double
+ void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Convert vector long to vector FP
+ void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
+ void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Evex casts with truncation
+ void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
+
//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
@@ -1472,20 +1540,30 @@ class Assembler : public AbstractAssembler {
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
- void evmovdqub(Address dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, Address src, int vector_len);
- void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
- void evmovdquw(Address dst, XMMRegister src, int vector_len);
- void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
- void evmovdquw(XMMRegister dst, Address src, int vector_len);
- void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+
+ // Generic move instructions.
+ void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type);
+ void evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type);
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
@@ -1517,6 +1595,9 @@ class Assembler : public AbstractAssembler {
// Move Quadword
void movq(Address dst, XMMRegister src);
void movq(XMMRegister dst, Address src);
+ void movq(XMMRegister dst, XMMRegister src);
+ void movq(Register dst, XMMRegister src);
+ void movq(XMMRegister dst, Register src);
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
@@ -1597,6 +1678,8 @@ class Assembler : public AbstractAssembler {
void btrq(Address dst, int imm8);
#endif
+ void orw(Register dst, Register src);
+
void orl(Address dst, int32_t imm32);
void orl(Register dst, int32_t imm32);
void orl(Register dst, Address src);
@@ -1610,17 +1693,32 @@ class Assembler : public AbstractAssembler {
void orq(Register dst, Address src);
void orq(Register dst, Register src);
+ // Pack with signed saturation
+ void packsswb(XMMRegister dst, XMMRegister src);
+ void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void packssdw(XMMRegister dst, XMMRegister src);
+ void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
+ void packusdw(XMMRegister dst, XMMRegister src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- // Pemutation of 64bit words
+ // Permutations
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+ void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
@@ -1633,11 +1731,14 @@ class Assembler : public AbstractAssembler {
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void pcmpeqb(XMMRegister dst, XMMRegister src);
+ void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
+
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
@@ -1650,16 +1751,22 @@ class Assembler : public AbstractAssembler {
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
+ void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void pcmpgtq(XMMRegister dst, XMMRegister src);
+ void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
@@ -1668,6 +1775,7 @@ class Assembler : public AbstractAssembler {
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
+ void pextrb(Register dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
@@ -1676,21 +1784,46 @@ class Assembler : public AbstractAssembler {
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
+ void pinsrb(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
+ void insertps(XMMRegister dst, XMMRegister src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
- // SSE4.1 packed move
+ // AVX insert
+ void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+
+ // Zero extend moves
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
-
+ void pmovzxbd(XMMRegister dst, XMMRegister src);
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
+ void pmovzxdq(XMMRegister dst, XMMRegister src);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ // Sign extend moves
+ void pmovsxbd(XMMRegister dst, XMMRegister src);
+ void pmovsxbq(XMMRegister dst, XMMRegister src);
+ void pmovsxbw(XMMRegister dst, XMMRegister src);
+ void pmovsxwd(XMMRegister dst, XMMRegister src);
+ void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
+
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
@@ -1698,10 +1831,6 @@ class Assembler : public AbstractAssembler {
void evpmovdb(Address dst, XMMRegister src, int vector_len);
- // Sign extend moves
- void pmovsxbw(XMMRegister dst, XMMRegister src);
- void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
-
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1745,10 +1874,17 @@ class Assembler : public AbstractAssembler {
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
- // Shuffle Packed Low Words
+ // Shuffle Packed High/Low Words
+ void pshufhw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
+ //shuffle floats and doubles
+ void pshufps(XMMRegister, XMMRegister, int);
+ void pshufpd(XMMRegister, XMMRegister, int);
+ void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
+ void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
+
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
@@ -1764,6 +1900,9 @@ class Assembler : public AbstractAssembler {
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);
+ // Vector compare
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len);
+
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
@@ -1837,6 +1976,7 @@ class Assembler : public AbstractAssembler {
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
+ void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
@@ -1955,6 +2095,7 @@ class Assembler : public AbstractAssembler {
void xorl(Register dst, Register src);
void xorb(Register dst, Address src);
+ void xorw(Register dst, Register src);
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
@@ -1989,8 +2130,12 @@ class Assembler : public AbstractAssembler {
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
+ void shrxq(Register dst, Register src1, Register src2);
+
//====================VECTOR ARITHMETIC=====================================
+ void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
+ void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
@@ -2100,13 +2245,41 @@ class Assembler : public AbstractAssembler {
// Multiply packed integers (only shorts and ints)
void pmullw(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, XMMRegister src);
+ void pmuludq(XMMRegister dst, XMMRegister src);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ // Minimum of packed integers
+ void pminsb(XMMRegister dst, XMMRegister src);
+ void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsw(XMMRegister dst, XMMRegister src);
+ void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsd(XMMRegister dst, XMMRegister src);
+ void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minps(XMMRegister dst, XMMRegister src);
+ void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minpd(XMMRegister dst, XMMRegister src);
+ void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
+ // Maximum of packed integers
+ void pmaxsb(XMMRegister dst, XMMRegister src);
+ void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsw(XMMRegister dst, XMMRegister src);
+ void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsd(XMMRegister dst, XMMRegister src);
+ void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxps(XMMRegister dst, XMMRegister src);
+ void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxpd(XMMRegister dst, XMMRegister src);
+ void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
// Shift left packed integers
void psllw(XMMRegister dst, int shift);
void pslld(XMMRegister dst, int shift);
@@ -2148,9 +2321,22 @@ class Assembler : public AbstractAssembler {
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ // Variable shift left packed integers
+ void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right packed integers
+ void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right arithmetic packed integers
+ void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
@@ -2158,6 +2344,7 @@ class Assembler : public AbstractAssembler {
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
@@ -2170,10 +2357,15 @@ class Assembler : public AbstractAssembler {
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
+
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
@@ -2251,7 +2443,21 @@ class Assembler : public AbstractAssembler {
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
- void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
+ // Gather AVX2 and AVX3
+ void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
+
+ //Scatter AVX3 only
+ void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
@@ -2264,14 +2470,56 @@ class Assembler : public AbstractAssembler {
// runtime code and native libraries.
void vzeroupper();
- // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
+ // Vector double compares
+ void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
+ void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector float compares
+ void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
+ void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector integer compares
+ void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector long compares
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector byte compares
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector short compares
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector blends
+ void blendvps(XMMRegister dst, XMMRegister src);
+ void blendvpd(XMMRegister dst, XMMRegister src);
+ void pblendvb(XMMRegister dst, XMMRegister src);
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
- void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
- void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
+ void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
-
+ void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
@@ -2367,7 +2615,8 @@ class InstructionAttr {
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
- // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+ // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
+ // This method unsets it so that merge semantics are used instead.
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation
diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
index 526fe5af2fc..6853953f0eb 100644
--- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
@@ -79,6 +79,32 @@ void ConversionStub::emit_code(LIR_Assembler* ce) {
}
#endif // !_LP64
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset());
+#ifdef _LP64
+ __ lea(rscratch1, safepoint_pc);
+ __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
+#else
+ const Register tmp1 = rcx;
+ const Register tmp2 = rdx;
+ __ push(tmp1);
+ __ push(tmp2);
+
+ __ lea(tmp1, safepoint_pc);
+ __ get_thread(tmp2);
+ __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+
+ __ pop(tmp2);
+ __ pop(tmp1);
+#endif /* _LP64 */
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+ __ jump(RuntimeAddress(stub));
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
Metadata *m = _method->as_constant_ptr()->as_metadata();
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index 8a0200a18dc..bba946ec4ad 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_CodeStubs.hpp"
#include "c1/c1_Compilation.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
@@ -517,8 +518,7 @@ int LIR_Assembler::emit_deopt_handler() {
return offset;
}
-
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
assert(result->fpu() == 0, "result must already be on TOS");
@@ -531,22 +531,18 @@ void LIR_Assembler::return_op(LIR_Opr result) {
__ reserved_stack_check();
}
- bool result_is_oop = result->is_valid() ? result->is_oop() : false;
-
// Note: we do not need to round double result; float result has the right precision
// the poll sets the condition code, but no data registers
#ifdef _LP64
- const Register poll_addr = rscratch1;
- __ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
+ const Register thread = r15_thread;
#else
- const Register poll_addr = rbx;
- assert(FrameMap::is_caller_save_register(poll_addr), "will overwrite");
- __ get_thread(poll_addr);
- __ movptr(poll_addr, Address(poll_addr, Thread::polling_page_offset()));
+ const Register thread = rbx;
+ __ get_thread(thread);
#endif
+ code_stub->set_safepoint_offset(__ offset());
__ relocate(relocInfo::poll_return_type);
- __ testl(rax, Address(poll_addr, 0));
+ __ safepoint_poll(*code_stub->entry(), thread, true /* at_return */, true /* in_nmethod */);
__ ret(0);
}
diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
index 53935539a36..60347c41163 100644
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
@@ -69,7 +69,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
push(thread);
#endif // _LP64
- int call_offset;
+ int call_offset = -1;
if (!align_stack) {
set_last_Java_frame(thread, noreg, rbp, NULL);
} else {
@@ -133,6 +133,8 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
if (metadata_result->is_valid()) {
get_vm_result_2(metadata_result, thread);
}
+
+ assert(call_offset >= 0, "Should be set");
return call_offset;
}
diff --git a/src/hotspot/cpu/x86/c1_globals_x86.hpp b/src/hotspot/cpu/x86/c1_globals_x86.hpp
index fbf538c2cec..afd2a65cb89 100644
--- a/src/hotspot/cpu/x86/c1_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_globals_x86.hpp
@@ -33,8 +33,6 @@
#ifndef TIERED
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, true );
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 8940b0c3c44..3aef6446f78 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -33,6 +33,21 @@
#include "runtime/objectMonitor.hpp"
#include "runtime/stubRoutines.hpp"
+inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
+ switch (vlen_in_bytes) {
+ case 4: // fall-through
+ case 8: // fall-through
+ case 16: return Assembler::AVX_128bit;
+ case 32: return Assembler::AVX_256bit;
+ case 64: return Assembler::AVX_512bit;
+
+ default: {
+ ShouldNotReachHere();
+ return Assembler::AVX_NoVec;
+ }
+ }
+}
+
void C2_MacroAssembler::setvectmask(Register dst, Register src) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
@@ -861,6 +876,174 @@ void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, i
}
}
+void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) {
+ assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity");
+ assert(tmp == xnoreg || elem_bt == T_LONG, "unused");
+
+ if (opcode == Op_MinV) {
+ if (elem_bt == T_BYTE) {
+ pminsb(dst, src);
+ } else if (elem_bt == T_SHORT) {
+ pminsw(dst, src);
+ } else if (elem_bt == T_INT) {
+ pminsd(dst, src);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ assert(tmp == xmm0, "required");
+ assert_different_registers(dst, src, tmp);
+ movdqu(xmm0, dst);
+ pcmpgtq(xmm0, src);
+ blendvpd(dst, src); // xmm0 as mask
+ }
+ } else { // opcode == Op_MaxV
+ if (elem_bt == T_BYTE) {
+ pmaxsb(dst, src);
+ } else if (elem_bt == T_SHORT) {
+ pmaxsw(dst, src);
+ } else if (elem_bt == T_INT) {
+ pmaxsd(dst, src);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ assert(tmp == xmm0, "required");
+ assert_different_registers(dst, src, tmp);
+ movdqu(xmm0, src);
+ pcmpgtq(xmm0, dst);
+ blendvpd(dst, src); // xmm0 as mask
+ }
+ }
+}
+
+void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ int vlen_enc) {
+ assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity");
+
+ if (opcode == Op_MinV) {
+ if (elem_bt == T_BYTE) {
+ vpminsb(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_SHORT) {
+ vpminsw(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_INT) {
+ vpminsd(dst, src1, src2, vlen_enc);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
+ vpminsq(dst, src1, src2, vlen_enc);
+ } else {
+ assert_different_registers(dst, src1, src2);
+ vpcmpgtq(dst, src1, src2, vlen_enc);
+ vblendvpd(dst, src1, src2, dst, vlen_enc);
+ }
+ }
+ } else { // opcode == Op_MaxV
+ if (elem_bt == T_BYTE) {
+ vpmaxsb(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_SHORT) {
+ vpmaxsw(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_INT) {
+ vpmaxsd(dst, src1, src2, vlen_enc);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
+ vpmaxsq(dst, src1, src2, vlen_enc);
+ } else {
+ assert_different_registers(dst, src1, src2);
+ vpcmpgtq(dst, src1, src2, vlen_enc);
+ vblendvpd(dst, src2, src1, dst, vlen_enc);
+ }
+ }
+ }
+}
+
+// Float/Double min max
+
+void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc) {
+ assert(UseAVX > 0, "required");
+ assert(opcode == Op_MinV || opcode == Op_MinReductionV ||
+ opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity");
+ assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity");
+ assert_different_registers(a, b, tmp, atmp, btmp);
+
+ bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
+ bool is_double_word = is_double_word_type(elem_bt);
+
+ if (!is_double_word && is_min) {
+ vblendvps(atmp, a, b, a, vlen_enc);
+ vblendvps(btmp, b, a, a, vlen_enc);
+ vminps(tmp, atmp, btmp, vlen_enc);
+ vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvps(dst, tmp, atmp, btmp, vlen_enc);
+ } else if (!is_double_word && !is_min) {
+ vblendvps(btmp, b, a, b, vlen_enc);
+ vblendvps(atmp, a, b, b, vlen_enc);
+ vmaxps(tmp, atmp, btmp, vlen_enc);
+ vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvps(dst, tmp, atmp, btmp, vlen_enc);
+ } else if (is_double_word && is_min) {
+ vblendvpd(atmp, a, b, a, vlen_enc);
+ vblendvpd(btmp, b, a, a, vlen_enc);
+ vminpd(tmp, atmp, btmp, vlen_enc);
+ vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
+ } else {
+ assert(is_double_word && !is_min, "sanity");
+ vblendvpd(btmp, b, a, b, vlen_enc);
+ vblendvpd(atmp, a, b, b, vlen_enc);
+ vmaxpd(tmp, atmp, btmp, vlen_enc);
+ vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
+ }
+}
+
+void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc) {
+ assert(UseAVX > 2, "required");
+ assert(opcode == Op_MinV || opcode == Op_MinReductionV ||
+ opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity");
+ assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity");
+ assert_different_registers(dst, a, b, atmp, btmp);
+
+ bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
+ bool is_double_word = is_double_word_type(elem_bt);
+ bool merge = true;
+
+ if (!is_double_word && is_min) {
+ evpmovd2m(ktmp, a, vlen_enc);
+ evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
+ vminps(dst, atmp, btmp, vlen_enc);
+ evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
+ } else if (!is_double_word && !is_min) {
+ evpmovd2m(ktmp, b, vlen_enc);
+ evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
+ vmaxps(dst, atmp, btmp, vlen_enc);
+ evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
+ } else if (is_double_word && is_min) {
+ evpmovq2m(ktmp, a, vlen_enc);
+ evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
+ vminpd(dst, atmp, btmp, vlen_enc);
+ evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
+ } else {
+ assert(is_double_word && !is_min, "sanity");
+ evpmovq2m(ktmp, b, vlen_enc);
+ evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
+ vmaxpd(dst, atmp, btmp, vlen_enc);
+ evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
+ }
+}
+
void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
if (sign) {
pmovsxbw(dst, src);
@@ -877,6 +1060,22 @@ void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, i
}
}
+void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
+ if (sign) {
+ vpmovsxbd(dst, src, vector_len);
+ } else {
+ vpmovzxbd(dst, src, vector_len);
+ }
+}
+
+void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
+ if (sign) {
+ vpmovsxwd(dst, src, vector_len);
+ } else {
+ vpmovzxwd(dst, src, vector_len);
+ }
+}
+
void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
int shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
@@ -928,14 +1127,13 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
}
}
-void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
- if (opcode == Op_RShiftVI) {
- psrad(dst, src);
- } else if (opcode == Op_LShiftVI) {
- pslld(dst, src);
- } else {
- assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
- psrld(dst, src);
+void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVI: psrad(dst, shift); break;
+ case Op_LShiftVI: pslld(dst, shift); break;
+ case Op_URShiftVI: psrld(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -950,47 +1148,53 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds
}
}
-void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if (opcode == Op_RShiftVI) {
- vpsrad(dst, nds, src, vector_len);
- } else if (opcode == Op_LShiftVI) {
- vpslld(dst, nds, src, vector_len);
- } else {
- assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
- vpsrld(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break;
+ case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break;
+ case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) {
- if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
- psraw(dst, src);
- } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
- psllw(dst, src);
- } else {
- assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
- psrlw(dst, src);
+void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: psraw(dst, shift); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: psllw(dst, shift); break;
+
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVB: psrlw(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
- vpsraw(dst, nds, src, vector_len);
- } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
- vpsllw(dst, nds, src, vector_len);
- } else {
- assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
- vpsrlw(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
- if (opcode == Op_RShiftVL) {
- psrlq(dst, src); // using srl to implement sra on pre-avs512 systems
- } else if (opcode == Op_LShiftVL) {
- psllq(dst, src);
- } else {
- assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
- psrlq(dst, src);
+void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems
+ case Op_LShiftVL: psllq(dst, shift); break;
+ case Op_URShiftVL: psrlq(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -1005,14 +1209,13 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
}
}
-void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if (opcode == Op_RShiftVL) {
- evpsraq(dst, nds, src, vector_len);
- } else if (opcode == Op_LShiftVL) {
- vpsllq(dst, nds, src, vector_len);
- } else {
- assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
- vpsrlq(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break;
+ case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break;
+ case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -1027,45 +1230,351 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds
}
}
-// Reductions for vectors of ints, longs, floats, and doubles.
+void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: // fall-through
+ case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: // fall-through
+ case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVB: // fall-through
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break;
-void C2_MacroAssembler::reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src) {
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVB: // fall-through
+ case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) {
+ assert(UseAVX >= 2, "required");
+ switch (opcode) {
+ case Op_RShiftVL: {
+ if (UseAVX > 2) {
+ assert(tmp == xnoreg, "not used");
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ evpsravq(dst, src, shift, vlen_enc);
+ } else {
+ vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask()));
+ vpsrlvq(dst, src, shift, vlen_enc);
+ vpsrlvq(tmp, tmp, shift, vlen_enc);
+ vpxor(dst, dst, tmp, vlen_enc);
+ vpsubq(dst, dst, tmp, vlen_enc);
+ }
+ break;
+ }
+ case Op_LShiftVL: {
+ assert(tmp == xnoreg, "not used");
+ vpsllvq(dst, src, shift, vlen_enc);
+ break;
+ }
+ case Op_URShiftVL: {
+ assert(tmp == xnoreg, "not used");
+ vpsrlvq(dst, src, shift, vlen_enc);
+ break;
+ }
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst
+void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
+ assert(opcode == Op_LShiftVB ||
+ opcode == Op_RShiftVB ||
+ opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
+ bool sign = (opcode != Op_URShiftVB);
+ assert(vector_len == 0, "required");
+ vextendbd(sign, dst, src, 1);
+ vpmovzxbd(vtmp, shift, 1);
+ varshiftd(opcode, dst, dst, vtmp, 1);
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch);
+ vextracti128_high(vtmp, dst);
+ vpackusdw(dst, dst, vtmp, 0);
+}
+
+// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst
+void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
+ assert(opcode == Op_LShiftVB ||
+ opcode == Op_RShiftVB ||
+ opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
+ bool sign = (opcode != Op_URShiftVB);
+ int ext_vector_len = vector_len + 1;
+ vextendbw(sign, dst, src, ext_vector_len);
+ vpmovzxbw(vtmp, shift, ext_vector_len);
+ varshiftw(opcode, dst, dst, vtmp, ext_vector_len);
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch);
+ if (vector_len == 0) {
+ vextracti128_high(vtmp, dst);
+ vpackuswb(dst, dst, vtmp, vector_len);
+ } else {
+ vextracti64x4_high(vtmp, dst);
+ vpackuswb(dst, dst, vtmp, vector_len);
+ vpermq(dst, dst, 0xD8, vector_len);
+ }
+}
+
+void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) {
+ switch(typ) {
+ case T_BYTE:
+ pinsrb(dst, val, idx);
+ break;
+ case T_SHORT:
+ pinsrw(dst, val, idx);
+ break;
+ case T_INT:
+ pinsrd(dst, val, idx);
+ break;
+ case T_LONG:
+ pinsrq(dst, val, idx);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) {
+ switch(typ) {
+ case T_BYTE:
+ vpinsrb(dst, src, val, idx);
+ break;
+ case T_SHORT:
+ vpinsrw(dst, src, val, idx);
+ break;
+ case T_INT:
+ vpinsrd(dst, src, val, idx);
+ break;
+ case T_LONG:
+ vpinsrq(dst, src, val, idx);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len);
+ break;
+ case T_FLOAT:
+ vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len);
+ break;
+ case T_LONG:
+ vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len);
+ break;
+ case T_DOUBLE:
+ vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len);
+ break;
+ case T_FLOAT:
+ evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len);
+ break;
+ case T_LONG:
+ evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len);
+ break;
+ case T_DOUBLE:
+ evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len);
+ break;
+ case T_FLOAT:
+ evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len);
+ break;
+ case T_LONG:
+ evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len);
+ break;
+ case T_DOUBLE:
+ evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt) {
+ if (vlen_in_bytes <= 16) {
+ pxor (dst, dst);
+ psubb(dst, src);
+ switch (elem_bt) {
+ case T_BYTE: /* nothing to do */ break;
+ case T_SHORT: pmovsxbw(dst, dst); break;
+ case T_INT: pmovsxbd(dst, dst); break;
+ case T_FLOAT: pmovsxbd(dst, dst); break;
+ case T_LONG: pmovsxbq(dst, dst); break;
+ case T_DOUBLE: pmovsxbq(dst, dst); break;
+
+ default: assert(false, "%s", type2name(elem_bt));
+ }
+ } else {
+ int vlen_enc = vector_length_encoding(vlen_in_bytes);
+
+ vpxor (dst, dst, dst, vlen_enc);
+ vpsubb(dst, dst, src, vlen_enc);
+ switch (elem_bt) {
+ case T_BYTE: /* nothing to do */ break;
+ case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break;
+ case T_INT: vpmovsxbd(dst, dst, vlen_enc); break;
+ case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break;
+ case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break;
+ case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
+
+ default: assert(false, "%s", type2name(elem_bt));
+ }
+ }
+}
+
+void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
+ ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
+ if (vlen_in_bytes <= 16) {
+ movdqu(dst, addr, scratch);
+ } else if (vlen_in_bytes == 32) {
+ vmovdqu(dst, addr, scratch);
+ } else {
+ assert(vlen_in_bytes == 64, "%d", vlen_in_bytes);
+ evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
+ }
+}
+// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
+
+void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
int vector_len = Assembler::AVX_128bit;
switch (opcode) {
case Op_AndReductionV: pand(dst, src); break;
case Op_OrReductionV: por (dst, src); break;
case Op_XorReductionV: pxor(dst, src); break;
-
+ case Op_MinReductionV:
+ switch (typ) {
+ case T_BYTE: pminsb(dst, src); break;
+ case T_SHORT: pminsw(dst, src); break;
+ case T_INT: pminsd(dst, src); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpminsq(dst, dst, src, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MaxReductionV:
+ switch (typ) {
+ case T_BYTE: pmaxsb(dst, src); break;
+ case T_SHORT: pmaxsw(dst, src); break;
+ case T_INT: pmaxsd(dst, src); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVF: addss(dst, src); break;
case Op_AddReductionVD: addsd(dst, src); break;
- case Op_AddReductionVI: paddd(dst, src); break;
+ case Op_AddReductionVI:
+ switch (typ) {
+ case T_BYTE: paddb(dst, src); break;
+ case T_SHORT: paddw(dst, src); break;
+ case T_INT: paddd(dst, src); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVL: paddq(dst, src); break;
-
case Op_MulReductionVF: mulss(dst, src); break;
case Op_MulReductionVD: mulsd(dst, src); break;
- case Op_MulReductionVI: pmulld(dst, src); break;
- case Op_MulReductionVL: vpmullq(dst, dst, src, vector_len); break;
-
- default: assert(false, "wrong opcode");
+ case Op_MulReductionVI:
+ switch (typ) {
+ case T_SHORT: pmullw(dst, src); break;
+ case T_INT: pmulld(dst, src); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MulReductionVL: assert(UseAVX > 2, "required");
+ vpmullq(dst, dst, src, vector_len); break;
+ default: assert(false, "wrong opcode");
}
}
-void C2_MacroAssembler::reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
+void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
int vector_len = Assembler::AVX_256bit;
switch (opcode) {
case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break;
case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break;
case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break;
-
- case Op_AddReductionVI: vpaddd(dst, src1, src2, vector_len); break;
+ case Op_MinReductionV:
+ switch (typ) {
+ case T_BYTE: vpminsb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpminsw(dst, src1, src2, vector_len); break;
+ case T_INT: vpminsd(dst, src1, src2, vector_len); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpminsq(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MaxReductionV:
+ switch (typ) {
+ case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break;
+ case T_INT: vpmaxsd(dst, src1, src2, vector_len); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpmaxsq(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_AddReductionVI:
+ switch (typ) {
+ case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpaddw(dst, src1, src2, vector_len); break;
+ case T_INT: vpaddd(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break;
-
- case Op_MulReductionVI: vpmulld(dst, src1, src2, vector_len); break;
+ case Op_MulReductionVI:
+ switch (typ) {
+ case T_SHORT: vpmullw(dst, src1, src2, vector_len); break;
+ case T_INT: vpmulld(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break;
-
- default: assert(false, "wrong opcode");
+ default: assert(false, "wrong opcode");
}
}
@@ -1087,9 +1596,48 @@ void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
}
}
+void C2_MacroAssembler::reduceB(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
+void C2_MacroAssembler::mulreduceB(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
+void C2_MacroAssembler::reduceS(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
void C2_MacroAssembler::reduceI(int opcode, int vlen,
- Register dst, Register src1, XMMRegister src2,
- XMMRegister vtmp1, XMMRegister vtmp2) {
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
switch (vlen) {
case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
@@ -1102,8 +1650,8 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen,
#ifdef _LP64
void C2_MacroAssembler::reduceL(int opcode, int vlen,
- Register dst, Register src1, XMMRegister src2,
- XMMRegister vtmp1, XMMRegister vtmp2) {
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
switch (vlen) {
case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
@@ -1158,10 +1706,10 @@ void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMReg
phaddd(vtmp1, vtmp1);
} else {
pshufd(vtmp1, src2, 0x1);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_INT, opcode, vtmp1, src2);
}
movdl(vtmp2, src1);
- reduce_operation_128(opcode, vtmp1, vtmp2);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
movdl(dst, vtmp1);
}
@@ -1174,7 +1722,7 @@ void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMReg
reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
} else {
pshufd(vtmp2, src2, 0xE);
- reduce_operation_128(opcode, vtmp2, src2);
+ reduce_operation_128(T_INT, opcode, vtmp2, src2);
reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
}
@@ -1187,51 +1735,176 @@ void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMReg
reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
} else {
vextracti128_high(vtmp1, src2);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_INT, opcode, vtmp1, src2);
reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
}
}
void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti64x4_high(vtmp2, src2);
- reduce_operation_256(opcode, vtmp2, vtmp2, src2);
+ reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2);
reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
+void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pshufd(vtmp2, src2, 0x1);
+ reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
+ movdqu(vtmp1, vtmp2);
+ psrldq(vtmp1, 2);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
+ movdqu(vtmp2, vtmp1);
+ psrldq(vtmp2, 1);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
+ movdl(vtmp2, src1);
+ pmovsxbd(vtmp1, vtmp1);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
+ pextrb(dst, vtmp1, 0x0);
+ movsbl(dst, dst);
+}
+
+void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pshufd(vtmp1, src2, 0xE);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, src2);
+ reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ vextracti128_high(vtmp2, src2);
+ reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
+ reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ vextracti64x4_high(vtmp1, src2);
+ reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2);
+ reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (UseAVX > 1) {
+ int vector_len = Assembler::AVX_256bit;
+ vpmovsxbw(vtmp1, src2, vector_len);
+ reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+ } else {
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+ pshufd(vtmp2, src2, 0x1);
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+ }
+}
+
+void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (UseAVX > 2 && VM_Version::supports_avx512bw()) {
+ int vector_len = Assembler::AVX_512bit;
+ vpmovsxbw(vtmp1, src2, vector_len);
+ reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+ } else {
+ assert(UseAVX >= 2,"Should not reach here.");
+ mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2);
+ vextracti128_high(vtmp2, src2);
+ mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+ }
+}
+
+void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2);
+ vextracti64x4_high(vtmp2, src2);
+ mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ if (vtmp1 != src2) {
+ movdqu(vtmp1, src2);
+ }
+ phaddw(vtmp1, vtmp1);
+ phaddw(vtmp1, vtmp1);
+ } else {
+ pshufd(vtmp2, src2, 0x1);
+ reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
+ movdqu(vtmp1, vtmp2);
+ psrldq(vtmp1, 2);
+ reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
+ }
+ movdl(vtmp2, src1);
+ pmovsxwd(vtmp1, vtmp1);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
+ pextrw(dst, vtmp1, 0x0);
+ movswl(dst, dst);
+}
+
+void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ if (vtmp1 != src2) {
+ movdqu(vtmp1, src2);
+ }
+ phaddw(vtmp1, src2);
+ } else {
+ pshufd(vtmp1, src2, 0xE);
+ reduce_operation_128(T_SHORT, opcode, vtmp1, src2);
+ }
+ reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ int vector_len = Assembler::AVX_256bit;
+ vphaddw(vtmp2, src2, src2, vector_len);
+ vpermq(vtmp2, vtmp2, 0xD8, vector_len);
+ } else {
+ vextracti128_high(vtmp2, src2);
+ reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
+ }
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ int vector_len = Assembler::AVX_256bit;
+ vextracti64x4_high(vtmp1, src2);
+ reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2);
+ reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
#ifdef _LP64
void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
pshufd(vtmp2, src2, 0xE);
- reduce_operation_128(opcode, vtmp2, src2);
+ reduce_operation_128(T_LONG, opcode, vtmp2, src2);
movdq(vtmp1, src1);
- reduce_operation_128(opcode, vtmp1, vtmp2);
+ reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2);
movdq(dst, vtmp1);
}
void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti128_high(vtmp1, src2);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_LONG, opcode, vtmp1, src2);
reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
}
void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti64x4_high(vtmp2, src2);
- reduce_operation_256(opcode, vtmp2, vtmp2, src2);
+ reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2);
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
#endif // _LP64
void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
- reduce_operation_128(opcode, dst, src);
+ reduce_operation_128(T_FLOAT, opcode, dst, src);
pshufd(vtmp, src, 0x1);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
reduce2F(opcode, dst, src, vtmp);
pshufd(vtmp, src, 0x2);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
pshufd(vtmp, src, 0x3);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1247,9 +1920,9 @@ void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src,
}
void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
- reduce_operation_128(opcode, dst, src);
+ reduce_operation_128(T_DOUBLE, opcode, dst, src);
pshufd(vtmp, src, 0xE);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_DOUBLE, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1264,6 +1937,207 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X
reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
}
+void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ XMMRegister xmm_0, XMMRegister xmm_1) {
+ int permconst[] = {1, 14};
+ XMMRegister wsrc = src;
+ XMMRegister wdst = xmm_0;
+ XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
+
+ int vlen_enc = Assembler::AVX_128bit;
+ if (vlen == 16) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+
+ for (int i = log2(vlen) - 1; i >=0; i--) {
+ if (i == 0 && !is_dst_valid) {
+ wdst = dst;
+ }
+ if (i == 3) {
+ vextracti64x4_high(wtmp, wsrc);
+ } else if (i == 2) {
+ vextracti128_high(wtmp, wsrc);
+ } else { // i = [0,1]
+ vpermilps(wtmp, wsrc, permconst[i], vlen_enc);
+ }
+ vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
+ wsrc = wdst;
+ vlen_enc = Assembler::AVX_128bit;
+ }
+ if (is_dst_valid) {
+ vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
+ }
+}
+
+void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ XMMRegister xmm_0, XMMRegister xmm_1) {
+ XMMRegister wsrc = src;
+ XMMRegister wdst = xmm_0;
+ XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
+ int vlen_enc = Assembler::AVX_128bit;
+ if (vlen == 8) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ for (int i = log2(vlen) - 1; i >=0; i--) {
+ if (i == 0 && !is_dst_valid) {
+ wdst = dst;
+ }
+ if (i == 1) {
+ vextracti128_high(wtmp, wsrc);
+ } else if (i == 2) {
+ vextracti64x4_high(wtmp, wsrc);
+ } else {
+ assert(i == 0, "%d", i);
+ vpermilpd(wtmp, wsrc, 1, vlen_enc);
+ }
+ vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
+ wsrc = wdst;
+ vlen_enc = Assembler::AVX_128bit;
+ }
+ if (is_dst_valid) {
+ vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
+ }
+}
+
+void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) {
+ switch (bt) {
+ case T_BYTE: pextrb(dst, src, idx); break;
+ case T_SHORT: pextrw(dst, src, idx); break;
+ case T_INT: pextrd(dst, src, idx); break;
+ case T_LONG: pextrq(dst, src, idx); break;
+
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int lane = elemindex / elem_per_lane;
+ int eindex = elemindex % elem_per_lane;
+
+ if (lane >= 2) {
+ assert(UseAVX > 2, "required");
+ vextractf32x4(dst, src, lane & 3);
+ return dst;
+ } else if (lane > 0) {
+ assert(UseAVX > 0, "required");
+ vextractf128(dst, src, lane);
+ return dst;
+ } else {
+ return src;
+ }
+}
+
+void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int eindex = elemindex % elem_per_lane;
+ assert(is_integral_type(typ),"required");
+
+ if (eindex == 0) {
+ if (typ == T_LONG) {
+ movq(dst, src);
+ } else {
+ movdl(dst, src);
+ if (typ == T_BYTE)
+ movsbl(dst, dst);
+ else if (typ == T_SHORT)
+ movswl(dst, dst);
+ }
+ } else {
+ extract(typ, dst, src, eindex);
+ }
+}
+
+void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int eindex = elemindex % elem_per_lane;
+ assert((typ == T_FLOAT || typ == T_DOUBLE),"required");
+
+ if (eindex == 0) {
+ movq(dst, src);
+ } else {
+ if (typ == T_FLOAT) {
+ if (UseAVX == 0) {
+ movdqu(dst, src);
+ pshufps(dst, dst, eindex);
+ } else {
+ vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
+ }
+ } else {
+ if (UseAVX == 0) {
+ movdqu(dst, src);
+ psrldq(dst, eindex*esize);
+ } else {
+ vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit);
+ }
+ movq(dst, dst);
+ }
+ }
+ // Zero upper bits
+ if (typ == T_FLOAT) {
+ if (UseAVX == 0) {
+ assert((vtmp != xnoreg) && (tmp != noreg), "required.");
+ movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp);
+ pand(dst, vtmp);
+ } else {
+ assert((tmp != noreg), "required.");
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp);
+ }
+ }
+}
+
+void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
+ switch(typ) {
+ case T_BYTE:
+ evpcmpb(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_SHORT:
+ evpcmpw(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_INT:
+ case T_FLOAT:
+ evpcmpd(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_LONG:
+ case T_DOUBLE:
+ evpcmpq(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
+ switch(typ) {
+ case T_BYTE:
+ evpblendmb(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_SHORT:
+ evpblendmw(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_INT:
+ case T_FLOAT:
+ evpblendmd(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_LONG:
+ case T_DOUBLE:
+ evpblendmq(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
//-------------------------------------------------------------------------------------------
// IndexOf for constant substrings with size >= 8 chars
@@ -1850,7 +2724,7 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist
pmovmskb(tmp, vec3);
}
bsfl(ch, tmp);
- addl(result, ch);
+ addptr(result, ch);
bind(FOUND_SEQ_CHAR);
subptr(result, str1);
@@ -1859,6 +2733,99 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist
bind(DONE_LABEL);
} // string_indexof_char
+void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
+ ShortBranchVerifier sbv(this);
+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+
+ int stride = 16;
+
+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
+ FOUND_SEQ_CHAR, DONE_LABEL;
+
+ movptr(result, str1);
+ if (UseAVX >= 2) {
+ cmpl(cnt1, stride);
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);
+ cmpl(cnt1, stride*2);
+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
+ movdl(vec1, ch);
+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
+ vpxor(vec2, vec2);
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFE0); //vector count (in chars)
+ andl(cnt1,0x0000001F); //tail count (in chars)
+
+ bind(SCAN_TO_32_CHAR_LOOP);
+ vmovdqu(vec3, Address(result, 0));
+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
+ vptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 32);
+ subl(tmp, stride*2);
+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
+ jmp(SCAN_TO_16_CHAR);
+
+ bind(SCAN_TO_16_CHAR_INIT);
+ movdl(vec1, ch);
+ pxor(vec2, vec2);
+ pshufb(vec1, vec2);
+ }
+
+ bind(SCAN_TO_16_CHAR);
+ cmpl(cnt1, stride);
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
+ if (UseAVX < 2) {
+ movdl(vec1, ch);
+ pxor(vec2, vec2);
+ pshufb(vec1, vec2);
+ }
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes)
+ andl(cnt1,0x0000000F); //tail count (in bytes)
+
+ bind(SCAN_TO_16_CHAR_LOOP);
+ movdqu(vec3, Address(result, 0));
+ pcmpeqb(vec3, vec1);
+ ptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 16);
+ subl(tmp, stride);
+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
+
+ bind(SCAN_TO_CHAR_INIT);
+ testl(cnt1, cnt1);
+ jcc(Assembler::zero, RET_NOT_FOUND);
+ bind(SCAN_TO_CHAR_LOOP);
+ load_unsigned_byte(tmp, Address(result, 0));
+ cmpl(ch, tmp);
+ jccb(Assembler::equal, FOUND_SEQ_CHAR);
+ addptr(result, 1);
+ subl(cnt1, 1);
+ jccb(Assembler::zero, RET_NOT_FOUND);
+ jmp(SCAN_TO_CHAR_LOOP);
+
+ bind(RET_NOT_FOUND);
+ movl(result, -1);
+ jmpb(DONE_LABEL);
+
+ bind(FOUND_CHAR);
+ if (UseAVX >= 2) {
+ vpmovmskb(tmp, vec3);
+ } else {
+ pmovmskb(tmp, vec3);
+ }
+ bsfl(ch, tmp);
+ addptr(result, ch);
+
+ bind(FOUND_SEQ_CHAR);
+ subptr(result, str1);
+
+ bind(DONE_LABEL);
+} // stringL_indexof_char
+
// helper function for string_compare
void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
Address::ScaleFactor scale, Address::ScaleFactor scale1,
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index f16b193a21d..79ab55a75ad 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -28,6 +28,8 @@
// C2_MacroAssembler contains high-level macros for C2
public:
+ Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
+
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
@@ -71,25 +73,69 @@
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+
+ void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
+ XMMRegister tmp = xnoreg);
+ void vpminmax(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ int vlen_enc);
+
+ void vminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc);
+ void evminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc);
+
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
- void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
+ void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+ void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
- void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
- void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
- void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
- void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
- // Reductions for vectors of ints, longs, floats, and doubles.
+ void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
+ void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
+ void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
+
+ void insert(BasicType typ, XMMRegister dst, Register val, int idx);
+ void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
+ void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
+ void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
+ void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
+
+ // extract
+ void extract(BasicType typ, Register dst, XMMRegister src, int idx);
+ XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
+ void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
+ void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
- // dst = src1 + reduce(op, src2) using vtmp as temps
+ // blend
+ void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
+ void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
+
+ void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
+ void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
+
+ // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
+
+ // dst = src1 reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
@@ -99,38 +145,71 @@
void reduce_fp(int opcode, int vlen,
XMMRegister dst, XMMRegister src,
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
+ void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
+ void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
private:
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Int Reduction
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Byte Reduction
+ void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+
+ // Short Reduction
+ void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+
+ // Long Reduction
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
+ // Float Reduction
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Double Reduction
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
- void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
- void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
+ // Base reduction instruction
+ void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
+ void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
public:
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+
// IndexOf strings.
// Small strings are loaded through stack if they cross page boundary.
void string_indexof(Register str1, Register str2,
diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp
index 6513be7b53e..31e77b52568 100644
--- a/src/hotspot/cpu/x86/c2_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp
@@ -31,8 +31,6 @@
// Sets the default values for platform dependent flags used by the server compiler.
// (see c2_globals.hpp). Alpha-sorted.
define_pd_global(bool, BackgroundCompilation, true);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(bool, CICompileOSR, true);
define_pd_global(bool, InlineIntrinsics, true);
define_pd_global(bool, PreferInterpreterNativeStubs, false);
diff --git a/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
new file mode 100644
index 00000000000..c3d4850a5db
--- /dev/null
+++ b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ masm.
+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ RuntimeAddress callback_addr(stub);
+
+ __ bind(entry->_stub_label);
+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+#ifdef _LP64
+ __ lea(rscratch1, safepoint_pc);
+ __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
+#else
+ const Register tmp1 = rcx;
+ const Register tmp2 = rdx;
+ __ push(tmp1);
+ __ push(tmp2);
+
+ __ lea(tmp1, safepoint_pc);
+ __ get_thread(tmp2);
+ __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+
+ __ pop(tmp2);
+ __ pop(tmp1);
+#endif
+ __ jump(callback_addr);
+}
+#undef __
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index c433eabf993..1e9bf12cd2b 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -36,6 +36,7 @@
#include "runtime/monitorChunk.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/signature.hpp"
+#include "runtime/stackWatermarkSet.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "vmreg_x86.inline.hpp"
@@ -469,8 +470,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
//------------------------------------------------------------------------------
-// frame::sender
-frame frame::sender(RegisterMap* map) const {
+// frame::sender_raw
+frame frame::sender_raw(RegisterMap* map) const {
// Default is we done have to follow them. The sender_for_xxx will
// update it accordingly
map->set_include_argument_oops(false);
@@ -487,6 +488,16 @@ frame frame::sender(RegisterMap* map) const {
return frame(sender_sp(), link(), sender_pc());
}
+frame frame::sender(RegisterMap* map) const {
+ frame result = sender_raw(map);
+
+ if (map->process_frames()) {
+ StackWatermarkSet::on_iteration(map->thread(), result);
+ }
+
+ return result;
+}
+
bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
assert(is_interpreted_frame(), "Not an interpreted frame");
// These are reasonable sanity checks
diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp
index ffe5e92275d..26dbb2aa956 100644
--- a/src/hotspot/cpu/x86/frame_x86.hpp
+++ b/src/hotspot/cpu/x86/frame_x86.hpp
@@ -156,4 +156,7 @@
static jint interpreter_frame_expression_stack_direction() { return -1; }
+ // returns the sending frame, without applying any barriers
+ frame sender_raw(RegisterMap* map) const;
+
#endif // CPU_X86_FRAME_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
index 58dcd9ed5fb..2aac0608207 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
@@ -111,7 +111,8 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
__ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr);
if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false);
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(access.decorators(), access.type());
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), kind);
LIR_Opr tmp = gen->new_register(type);
__ move(result, tmp);
result = tmp;
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 5ce3cc95e93..40f16ef2731 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -32,7 +32,6 @@
#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
#include "interpreter/interpreter.hpp"
-#include "interpreter/interp_masm.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.hpp"
#include "utilities/macros.hpp"
@@ -44,8 +43,6 @@
#define __ masm->
-address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-
static void save_xmm_registers(MacroAssembler* masm) {
__ subptr(rsp, 64);
__ movdbl(Address(rsp, 0), xmm0);
@@ -271,11 +268,14 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
__ bind(done);
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind) {
assert(ShenandoahLoadRefBarrier, "Should be enabled");
- Label done;
+ Label heap_stable, not_cset;
+ __ block_comment("load_reference_barrier { ");
+
+ // Check if GC is active
#ifdef _LP64
Register thread = r15_thread;
#else
@@ -289,138 +289,130 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
__ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
- __ jccb(Assembler::zero, done);
-
- // Use rsi for src address
- const Register src_addr = rsi;
- // Setup address parameter first, if it does not clobber oop in dst
- bool need_addr_setup = (src_addr != dst);
-
- if (need_addr_setup) {
- __ push(src_addr);
- __ lea(src_addr, src);
-
- if (dst != rax) {
- // Move obj into rax and save rax
- __ push(rax);
- __ movptr(rax, dst);
- }
- } else {
- // dst == rsi
- __ push(rax);
- __ movptr(rax, dst);
-
- // we can clobber it, since it is outgoing register
- __ lea(src_addr, src);
- }
-
- save_xmm_registers(masm);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
- restore_xmm_registers(masm);
-
- if (need_addr_setup) {
- if (dst != rax) {
- __ movptr(dst, rax);
- __ pop(rax);
+ __ jcc(Assembler::zero, heap_stable);
+
+ Register tmp1 = noreg, tmp2 = noreg;
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ // Test for object in cset
+ // Allocate temporary registers
+ for (int i = 0; i < 8; i++) {
+ Register r = as_Register(i);
+ if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) {
+ if (tmp1 == noreg) {
+ tmp1 = r;
+ } else {
+ tmp2 = r;
+ break;
+ }
+ }
}
- __ pop(src_addr);
- } else {
- __ movptr(dst, rax);
- __ pop(rax);
+ assert(tmp1 != noreg, "tmp1 allocated");
+ assert(tmp2 != noreg, "tmp2 allocated");
+ assert_different_registers(tmp1, tmp2, src.base(), src.index());
+ assert_different_registers(tmp1, tmp2, dst);
+
+ __ push(tmp1);
+ __ push(tmp2);
+
+ // Optimized cset-test
+ __ movptr(tmp1, dst);
+ __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
+ __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
+ __ testbool(tmp1);
+ __ jcc(Assembler::zero, not_cset);
+ }
+
+ uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4);
+ __ subptr(rsp, num_saved_regs * wordSize);
+ uint slot = num_saved_regs;
+ if (dst != rax) {
+ __ movptr(Address(rsp, (--slot) * wordSize), rax);
}
-
- __ bind(done);
-
-#ifndef _LP64
- __ pop(thread);
+ __ movptr(Address(rsp, (--slot) * wordSize), rcx);
+ __ movptr(Address(rsp, (--slot) * wordSize), rdx);
+ __ movptr(Address(rsp, (--slot) * wordSize), rdi);
+ __ movptr(Address(rsp, (--slot) * wordSize), rsi);
+#ifdef _LP64
+ __ movptr(Address(rsp, (--slot) * wordSize), r8);
+ __ movptr(Address(rsp, (--slot) * wordSize), r9);
+ __ movptr(Address(rsp, (--slot) * wordSize), r10);
+ __ movptr(Address(rsp, (--slot) * wordSize), r11);
+ // r12-r15 are callee saved in all calling conventions
#endif
-}
-
-void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) {
- if (!ShenandoahLoadRefBarrier) {
- return;
- }
-
- Label done;
- Label not_null;
- Label slow_path;
- __ block_comment("load_reference_barrier_native { ");
-
- // null check
- __ testptr(dst, dst);
- __ jcc(Assembler::notZero, not_null);
- __ jmp(done);
- __ bind(not_null);
-
+ assert(slot == 0, "must use all slots");
+ // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
#ifdef _LP64
- Register thread = r15_thread;
+ Register arg0 = c_rarg0, arg1 = c_rarg1;
#else
- Register thread = rcx;
- if (thread == dst) {
- thread = rbx;
- }
- __ push(thread);
- __ get_thread(thread);
-#endif
- assert_different_registers(dst, thread);
-
- Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- __ testb(gc_state, ShenandoahHeap::EVACUATION);
-#ifndef _LP64
- __ pop(thread);
+ Register arg0 = rdi, arg1 = rsi;
#endif
- __ jccb(Assembler::notZero, slow_path);
- __ jmp(done);
- __ bind(slow_path);
-
- if (dst != rax) {
- __ push(rax);
+ if (dst == arg1) {
+ __ lea(arg0, src);
+ __ xchgptr(arg1, arg0);
+ } else {
+ __ lea(arg1, src);
+ __ movptr(arg0, dst);
}
- __ push(rcx);
- __ push(rdx);
- __ push(rdi);
- __ push(rsi);
-#ifdef _LP64
- __ push(r8);
- __ push(r9);
- __ push(r10);
- __ push(r11);
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
-#endif
-
- assert_different_registers(dst, rsi);
- __ lea(rsi, src);
save_xmm_registers(masm);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), arg0, arg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), arg0, arg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1);
+ } else {
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
restore_xmm_registers(masm);
#ifdef _LP64
- __ pop(r15);
- __ pop(r14);
- __ pop(r13);
- __ pop(r12);
- __ pop(r11);
- __ pop(r10);
- __ pop(r9);
- __ pop(r8);
+ __ movptr(r11, Address(rsp, (slot++) * wordSize));
+ __ movptr(r10, Address(rsp, (slot++) * wordSize));
+ __ movptr(r9, Address(rsp, (slot++) * wordSize));
+ __ movptr(r8, Address(rsp, (slot++) * wordSize));
#endif
- __ pop(rsi);
- __ pop(rdi);
- __ pop(rdx);
- __ pop(rcx);
+ __ movptr(rsi, Address(rsp, (slot++) * wordSize));
+ __ movptr(rdi, Address(rsp, (slot++) * wordSize));
+ __ movptr(rdx, Address(rsp, (slot++) * wordSize));
+ __ movptr(rcx, Address(rsp, (slot++) * wordSize));
if (dst != rax) {
__ movptr(dst, rax);
- __ pop(rax);
+ __ movptr(rax, Address(rsp, (slot++) * wordSize));
}
- __ bind(done);
- __ block_comment("load_reference_barrier_native { ");
+ assert(slot == num_saved_regs, "must use all slots");
+ __ addptr(rsp, num_saved_regs * wordSize);
+
+ __ bind(not_cset);
+
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ __ pop(tmp2);
+ __ pop(tmp1);
+ }
+
+ __ bind(heap_stable);
+
+ __ block_comment("} load_reference_barrier");
+
+#ifndef _LP64
+ __ pop(thread);
+#endif
}
void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
@@ -464,16 +456,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm,
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) {
- if (ShenandoahLoadRefBarrier) {
- Label done;
- __ testptr(dst, dst);
- __ jcc(Assembler::zero, done);
- load_reference_barrier_not_null(masm, dst, src);
- __ bind(done);
- }
-}
-
//
// Arguments:
//
@@ -504,7 +486,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
// Preserve src location for LRB
if (dst == src.base() || dst == src.index()) {
- // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
+ // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
dst = tmp1;
use_tmp1_for_dst = true;
@@ -517,11 +499,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) {
- load_reference_barrier_native(masm, dst, src);
- } else {
- load_reference_barrier(masm, dst, src);
- }
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type);
+ load_reference_barrier(masm, dst, src, kind);
// Move loaded oop to final destination
if (dst != result_dst) {
@@ -638,7 +617,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
bool exchange, Register tmp1, Register tmp2) {
assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
- assert_different_registers(oldval, newval, tmp1, tmp2);
+ assert_different_registers(oldval, tmp1, tmp2);
+ assert_different_registers(newval, tmp1, tmp2);
Label L_success, L_failure;
@@ -870,10 +850,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
__ bind(slow_path);
ce->store_parameter(res, 0);
ce->store_parameter(addr, 1);
- if (stub->is_native()) {
- __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
- } else {
- __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+ switch (stub->kind()) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ jmp(*stub->continuation());
}
@@ -938,7 +926,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ epilogue();
}
-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) {
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) {
__ prologue("shenandoah_load_reference_barrier", false);
// arg0 : object to be resolved
@@ -947,20 +935,40 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#ifdef _LP64
__ load_parameter(0, c_rarg0);
__ load_parameter(1, c_rarg1);
- if (is_native) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), c_rarg0, c_rarg1);
- } else if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
+ break;
+ default:
+ ShouldNotReachHere();
}
#else
__ load_parameter(0, rax);
__ load_parameter(1, rbx);
- if (is_native) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rax, rbx);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx);
+ break;
+ default:
+ ShouldNotReachHere();
}
#endif
@@ -972,104 +980,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
-
-address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
- assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
- return _shenandoah_lrb;
-}
-
-#define __ cgen->assembler()->
-
-/*
- * Incoming parameters:
- * rax: oop
- * rsi: load address
- */
-address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
- __ align(CodeEntryAlignment);
- StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
- address start = __ pc();
-
- Label slow_path;
-
- // We use RDI, which also serves as argument register for slow call.
- // RAX always holds the src object ptr, except after the slow call,
- // then it holds the result. R8/RBX is used as temporary register.
-
- Register tmp1 = rdi;
- Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
-
- __ push(tmp1);
- __ push(tmp2);
-
- // Check for object being in the collection set.
- __ mov(tmp1, rax);
- __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
- __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
- __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
- __ testbool(tmp2);
- __ jccb(Assembler::notZero, slow_path);
- __ pop(tmp2);
- __ pop(tmp1);
- __ ret(0);
-
- __ bind(slow_path);
-
- __ push(rcx);
- __ push(rdx);
- __ push(rdi);
-#ifdef _LP64
- __ push(r8);
- __ push(r9);
- __ push(r10);
- __ push(r11);
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
-#endif
- __ push(rbp);
- __ movptr(rbp, rsp);
- __ andptr(rsp, -StackAlignmentInBytes);
- __ push_FPU_state();
- if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi);
- }
- __ pop_FPU_state();
- __ movptr(rsp, rbp);
- __ pop(rbp);
-#ifdef _LP64
- __ pop(r15);
- __ pop(r14);
- __ pop(r13);
- __ pop(r12);
- __ pop(r11);
- __ pop(r10);
- __ pop(r9);
- __ pop(r8);
-#endif
- __ pop(rdi);
- __ pop(rdx);
- __ pop(rcx);
-
- __ pop(tmp2);
- __ pop(tmp1);
- __ ret(0);
-
- return start;
-}
-
-#undef __
-
-void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
- if (ShenandoahLoadRefBarrier) {
- int stub_code_size = 4096;
- ResourceMark rm;
- BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
- CodeBuffer buf(bb);
- StubCodeGenerator cgen(&buf);
- _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
- }
-}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index 60aa3b4600d..108b5670206 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -27,6 +27,8 @@
#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+
#ifdef COMPILER1
class LIR_Assembler;
class ShenandoahPreBarrierStub;
@@ -38,8 +40,6 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- static address _shenandoah_lrb;
-
void satb_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -56,25 +56,18 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
bool tosca_live,
bool expand_call);
- void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src);
-
void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);
- address generate_shenandoah_lrb(StubCodeGenerator* cgen);
-
public:
- static address shenandoah_lrb();
-
void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp);
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind);
#endif
- void load_reference_barrier(MacroAssembler* masm, Register dst, Address src);
- void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src);
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind);
void cmpxchg_oop(MacroAssembler* masm,
Register res, Address addr, Register oldval, Register newval,
@@ -87,8 +80,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
Address dst, Register val, Register tmp1, Register tmp2);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
- virtual void barrier_stubs_init();
-
};
#endif // CPU_X86_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
index 83c8caa6a58..db558d8cb2a 100644
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
@@ -24,10 +24,9 @@
#ifndef CPU_X86_GC_Z_ZGLOBALS_X86_HPP
#define CPU_X86_GC_Z_ZGLOBALS_X86_HPP
-const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-const size_t ZPlatformHeapViews = 3;
-const size_t ZPlatformNMethodDisarmedOffset = 4;
-const size_t ZPlatformCacheLineSize = 64;
+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+const size_t ZPlatformHeapViews = 3;
+const size_t ZPlatformCacheLineSize = 64;
size_t ZPlatformAddressOffsetBits();
size_t ZPlatformAddressMetadataShift();
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index 738771e800a..140dcfc2f06 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -605,6 +605,10 @@ void InterpreterMacroAssembler::push_i(Register r) {
push(r);
}
+void InterpreterMacroAssembler::push_i_or_ptr(Register r) {
+ push(r);
+}
+
void InterpreterMacroAssembler::push_f(XMMRegister r) {
subptr(rsp, wordSize);
movflt(Address(rsp, 0), r);
@@ -853,7 +857,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
Label no_safepoint, dispatch;
if (table != safepoint_table && generate_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(r15_thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
lea(rscratch1, ExternalAddress((address)safepoint_table));
@@ -872,7 +876,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
Label no_safepoint;
const Register thread = rcx;
get_thread(thread);
- testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index);
@@ -961,6 +965,7 @@ void InterpreterMacroAssembler::narrow(Register result) {
// remove activation
//
+// Apply stack watermark barrier.
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from syncronized blocks.
// Remove the activation from the stack.
@@ -987,7 +992,23 @@ void InterpreterMacroAssembler::remove_activation(
const Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
// monitor pointers need different register
// because rdx may have the result in it
- NOT_LP64(get_thread(rcx);)
+ NOT_LP64(get_thread(rthread);)
+
+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+ // that would normally not be safe to use. Such bad returns into unsafe territory of
+ // the stack, will call InterpreterRuntime::at_unwind.
+ Label slow_path;
+ Label fast_path;
+ safepoint_poll(slow_path, rthread, true /* at_return */, false /* in_nmethod */);
+ jmp(fast_path);
+ bind(slow_path);
+ push(state);
+ set_last_Java_frame(rthread, noreg, rbp, (address)pc());
+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread);
+ NOT_LP64(get_thread(rthread);) // call_VM clobbered it, restore
+ reset_last_Java_frame(rthread, true);
+ pop(state);
+ bind(fast_path);
// get the value of _do_not_unlock_if_synchronized into rdx
const Address do_not_unlock_if_synchronized(rthread,
@@ -1938,7 +1959,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) {
if (state == atos) {
- MacroAssembler::_verify_oop(reg, "broken oop", file, line);
+ MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line);
}
}
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp
index 3e2e33278a1..288b1bd1dfe 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp
@@ -139,9 +139,18 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Expression stack
void pop_ptr(Register r = rax);
void pop_i(Register r = rax);
+
+ // On x86, pushing a ptr or an int is semantically identical, but we
+ // maintain a distinction for clarity and for making it easier to change
+ // semantics in the future
void push_ptr(Register r = rax);
void push_i(Register r = rax);
+ // push_i_or_ptr is provided for when explicitly allowing either a ptr or
+ // an int might have some advantage, while still documenting the fact that a
+ // ptr might be pushed to the stack.
+ void push_i_or_ptr(Register r = rax);
+
void push_f(XMMRegister r);
void pop_f(XMMRegister r);
void pop_d(XMMRegister r);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 8b19ddab7b8..d7fabfbbedb 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
+
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
@@ -369,11 +370,6 @@ void MacroAssembler::pushptr(AddressLiteral src) {
}
}
-void MacroAssembler::set_word_if_not_zero(Register dst) {
- xorl(dst, dst);
- set_byte_if_not_zero(dst);
-}
-
static void pass_arg0(MacroAssembler* masm, Register arg) {
masm->push(arg);
}
@@ -713,8 +709,12 @@ void MacroAssembler::movptr(Register dst, ArrayAddress src) {
// src should NEVER be a real pointer. Use AddressLiteral for true pointers
void MacroAssembler::movptr(Address dst, intptr_t src) {
- mov64(rscratch1, src);
- movq(dst, rscratch1);
+ if (is_simm32(src)) {
+ movptr(dst, checked_cast(src));
+ } else {
+ mov64(rscratch1, src);
+ movq(dst, rscratch1);
+ }
}
// These are mostly for initializing NULL
@@ -2495,6 +2495,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::movdqu(dst, src);
}
@@ -2519,6 +2520,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::vmovdqu(dst, src);
}
@@ -2532,6 +2534,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
}
+
+void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
+ if (reachable(src)) {
+ kmovwl(dst, as_Address(src));
+ } else {
+ lea(scratch_reg, src);
+ kmovwl(dst, Address(scratch_reg, 0));
+ }
+}
+
+void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
+ }
+ } else {
+ lea(scratch_reg, src);
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+}
+
+void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
+void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
+void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
@@ -2699,16 +2759,15 @@ void MacroAssembler::save_rax(Register tmp) {
else if (tmp != rax) mov(tmp, rax);
}
-void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {
-#ifdef _LP64
- assert(thread_reg == r15_thread, "should be");
-#else
- if (thread_reg == noreg) {
- thread_reg = temp_reg;
- get_thread(thread_reg);
+void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) {
+ if (at_return) {
+ // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
+ // we may safely use rsp instead to perform the stack watermark check.
+ cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, Thread::polling_word_offset()));
+ jcc(Assembler::above, slow_path);
+ return;
}
-#endif
- testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(thread_reg, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
}
@@ -3018,6 +3077,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
+void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
+ AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
+ if (width == Assembler::Q) {
+ Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
+ } else {
+ Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
+ }
+}
+
+void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
+ int eq_cond_enc = 0x29;
+ int gt_cond_enc = 0x37;
+ if (width != Assembler::Q) {
+ eq_cond_enc = 0x74 + width;
+ gt_cond_enc = 0x64 + width;
+ }
+ switch (cond) {
+ case eq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ break;
+ case neq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case le:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case nlt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case lt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ break;
+ case nle:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ break;
+ default:
+ assert(false, "Should not reach here");
+ }
+}
+
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpmovzxbw(dst, src, vector_len);
@@ -3142,6 +3293,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
+void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ bool merge, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
vdivsd(dst, nds, as_Address(src));
@@ -3238,7 +3399,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}
-//-------------------------------------------------------------------------------------------
+void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::vpermd(dst, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask);
@@ -3761,44 +3929,6 @@ void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
}
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0)
- return RegisterOrConstant(value + offset);
-
- // load indirectly to solve generation ordering problem
- movptr(tmp, ExternalAddress((address) delayed_value_addr));
-
-#ifdef ASSERT
- { Label L;
- testptr(tmp, tmp);
- if (WizardMode) {
- const char* buf = NULL;
- {
- ResourceMark rm;
- stringStream ss;
- ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
- buf = code_string(ss.as_string());
- }
- jcc(Assembler::notZero, L);
- STOP(buf);
- } else {
- jccb(Assembler::notZero, L);
- hlt();
- }
- bind(L);
- }
-#endif
-
- if (offset != 0)
- addptr(tmp, offset);
-
- return RegisterOrConstant(tmp);
-}
-
-
Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
int extra_slot_offset) {
// cf. TemplateTable::prepare_invoke(), if (load_receiver).
@@ -3820,7 +3950,6 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
return Address(rsp, scale_reg, scale_factor, offset);
}
-
void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
if (!VerifyOops) return;
@@ -3913,6 +4042,9 @@ class ControlWord {
case 1: rc = "round down"; break;
case 2: rc = "round up "; break;
case 3: rc = "chop "; break;
+ default:
+ rc = NULL; // silence compiler warnings
+ fatal("Unknown rounding control: %d", rounding_control());
};
// precision control
const char* pc;
@@ -3921,6 +4053,9 @@ class ControlWord {
case 1: pc = "reserved"; break;
case 2: pc = "53 bits "; break;
case 3: pc = "64 bits "; break;
+ default:
+ pc = NULL; // silence compiler warnings
+ fatal("Unknown precision control: %d", precision_control());
};
// flags
char f[9];
@@ -5764,7 +5899,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
- evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
+ evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
@@ -5783,7 +5918,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
notq(tmp2);
kmovql(k3, tmp2);
- evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
+ evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
@@ -7578,7 +7713,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
notl(result);
kmovdl(k3, result);
- evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@@ -7603,7 +7738,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
negptr(len);
bind(copy_32_loop);
- evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
@@ -7628,7 +7763,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(k3, result);
- evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@@ -7773,7 +7908,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
- evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
+ evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
@@ -7788,7 +7923,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
- evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
+ evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);
@@ -7963,6 +8098,7 @@ void MacroAssembler::cache_wbsync(bool is_pre)
sfence();
}
}
+
#endif // _LP64
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 3d009d69945..e7419fc916b 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -583,22 +583,30 @@ class MacroAssembler: public Assembler {
// method handles (JSR 292)
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
- //----
- void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
-
// Debugging
// only if +VerifyOops
void _verify_oop(Register reg, const char* s, const char* file, int line);
void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
+ void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
+ if (VerifyOops) {
+ _verify_oop(reg, s, file, line);
+ }
+ }
+ void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
+ if (VerifyOops) {
+ _verify_oop_addr(reg, s, file, line);
+ }
+ }
+
// TODO: verify method and klass metadata (compare against vptr?)
void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
-#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__)
-#define verify_oop_msg(reg, msg) _verify_oop(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
-#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr " #addr, __FILE__, __LINE__)
+#define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
+#define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
+#define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
@@ -643,13 +651,7 @@ class MacroAssembler: public Assembler {
// Check for reserved stack access in method being exited (for JIT)
void reserved_stack_check();
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
- // If thread_reg is != noreg the code assumes the register passed contains
- // the thread (required on 64 bit).
- void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
+ void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
void verify_tlab();
@@ -1078,15 +1080,59 @@ class MacroAssembler: public Assembler {
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
+
+ void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
+
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
+
+ // AVX512 Unaligned
+ void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
+ void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
+ void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
+ void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
+ void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
+ void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
+ if (dst->encoding() == src->encoding()) return;
+ Assembler::evmovdqul(dst, src, vector_len);
+ }
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ if (dst->encoding() == src->encoding() && mask == k0) return;
+ Assembler::evmovdqul(dst, mask, src, merge, vector_len);
+ }
+ void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
- void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
+ void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
+ if (dst->encoding() == src->encoding()) return;
+ Assembler::evmovdquq(dst, src, vector_len);
+ }
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ if (dst->encoding() == src->encoding() && mask == k0) return;
+ Assembler::evmovdquq(dst, mask, src, merge, vector_len);
+ }
+ void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
@@ -1208,6 +1254,30 @@ class MacroAssembler: public Assembler {
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
+
+ // Vector compares
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+
+
+ // Emit comparison instruction for the specified comparison predicate.
+ void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
+ void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
@@ -1236,6 +1306,7 @@ class MacroAssembler: public Assembler {
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vptest(XMMRegister dst, XMMRegister src);
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
@@ -1254,6 +1325,8 @@ class MacroAssembler: public Assembler {
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
@@ -1309,6 +1382,9 @@ class MacroAssembler: public Assembler {
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
+ void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
+
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
Assembler::vinserti32x4(dst, dst, src, imm8);
@@ -1727,6 +1803,35 @@ class MacroAssembler: public Assembler {
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
+
+#if COMPILER2_OR_JVMCI
+ void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register count, int shift,
+ Register index, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit);
+
+ void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register start_index, Register end_index,
+ Register count, int shift, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit);
+
+ void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift = Address::times_1, int offset = 0,
+ bool use64byteVector = false);
+
+ void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift = Address::times_1, int offset = 0);
+
+ void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ int shift = Address::times_1, int offset = 0);
+
+ void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ bool conjoint, int shift = Address::times_1, int offset = 0,
+ bool use64byteVector = false);
+#endif // COMPILER2_OR_JVMCI
+
#endif // _LP64
void vallones(XMMRegister dst, int vector_len);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp
new file mode 100644
index 00000000000..4368dee7329
--- /dev/null
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp
@@ -0,0 +1,253 @@
+/*
+* Copyright (c) 2020, Intel Corporation.
+*
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+#ifdef _LP64
+
+#if COMPILER2_OR_JVMCI
+
+void MacroAssembler::arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register count, int shift,
+ Register index, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit) {
+ Label L_entry_64, L_entry_96, L_entry_128;
+ Label L_entry_160, L_entry_192;
+
+ int size_mat[][6] = {
+ /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
+ /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
+ /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
+ /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
+ };
+
+ // Case A) Special case for length less than equal to 32 bytes.
+ cmpq(count, size_mat[shift][0]);
+ jccb(Assembler::greater, L_entry_64);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift);
+ jmp(L_exit);
+
+ // Case B) Special case for length less than equal to 64 bytes.
+ BIND(L_entry_64);
+ cmpq(count, size_mat[shift][1]);
+ jccb(Assembler::greater, L_entry_96);
+ copy64_masked_avx(to, from, xmm, mask, count, index, temp, shift, 0, use64byteVector);
+ jmp(L_exit);
+
+ // Case C) Special case for length less than equal to 96 bytes.
+ BIND(L_entry_96);
+ cmpq(count, size_mat[shift][2]);
+ jccb(Assembler::greater, L_entry_128);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ subq(count, 64 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 64);
+ jmp(L_exit);
+
+ // Case D) Special case for length less than equal to 128 bytes.
+ BIND(L_entry_128);
+ cmpq(count, size_mat[shift][3]);
+ jccb(Assembler::greater, L_entry_160);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy32_avx(to, from, index, xmm, shift, 64);
+ subq(count, 96 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 96);
+ jmp(L_exit);
+
+ // Case E) Special case for length less than equal to 160 bytes.
+ BIND(L_entry_160);
+ cmpq(count, size_mat[shift][4]);
+ jccb(Assembler::greater, L_entry_192);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
+ subq(count, 128 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 128);
+ jmp(L_exit);
+
+ // Case F) Special case for length less than equal to 192 bytes.
+ BIND(L_entry_192);
+ cmpq(count, size_mat[shift][5]);
+ jcc(Assembler::greater, L_entry);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
+ copy32_avx(to, from, index, xmm, shift, 128);
+ subq(count, 160 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 160);
+ jmp(L_exit);
+}
+
+void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register start_index, Register end_index,
+ Register count, int shift, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit) {
+ Label L_entry_64, L_entry_96, L_entry_128;
+ Label L_entry_160, L_entry_192;
+ bool avx3 = MaxVectorSize > 32 && AVX3Threshold == 0;
+
+ int size_mat[][6] = {
+ /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
+ /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
+ /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
+ /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
+ };
+
+ // Case A) Special case for length less than equal to 32 bytes.
+ cmpq(count, size_mat[shift][0]);
+ jccb(Assembler::greater, L_entry_64);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case B) Special case for length less than equal to 64 bytes.
+ BIND(L_entry_64);
+ cmpq(count, size_mat[shift][1]);
+ jccb(Assembler::greater, L_entry_96);
+ if (avx3) {
+ copy64_masked_avx(to, from, xmm, mask, count, start_index, temp, shift, 0, true);
+ } else {
+ copy32_avx(to, from, end_index, xmm, shift, -32);
+ subq(count, 32 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ }
+ jmp(L_exit);
+
+ // Case C) Special case for length less than equal to 96 bytes.
+ BIND(L_entry_96);
+ cmpq(count, size_mat[shift][2]);
+ jccb(Assembler::greater, L_entry_128);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ subq(count, 64 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case D) Special case for length less than equal to 128 bytes.
+ BIND(L_entry_128);
+ cmpq(count, size_mat[shift][3]);
+ jccb(Assembler::greater, L_entry_160);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy32_avx(to, from, end_index, xmm, shift, -96);
+ subq(count, 96 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case E) Special case for length less than equal to 160 bytes.
+ BIND(L_entry_160);
+ cmpq(count, size_mat[shift][4]);
+ jccb(Assembler::greater, L_entry_192);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
+ subq(count, 128 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case F) Special case for length less than equal to 192 bytes.
+ BIND(L_entry_192);
+ cmpq(count, size_mat[shift][5]);
+ jcc(Assembler::greater, L_entry);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
+ copy32_avx(to, from, end_index, xmm, shift, -160);
+ subq(count, 160 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+}
+
+void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift, int offset,
+ bool use64byteVector) {
+ BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ if (!use64byteVector) {
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ subptr(length, 32 >> shift);
+ copy32_masked_avx(dst, src, xmm, mask, length, index, temp, shift, offset+32);
+ } else {
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ assert(MaxVectorSize == 64, "vector length != 64");
+ negptr(length);
+ addq(length, 64);
+ mov64(temp, -1);
+ shrxq(temp, temp, length);
+ kmovql(mask, temp);
+ evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_512bit, type[shift]);
+ evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_512bit, type[shift]);
+ }
+}
+
+
+void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift, int offset) {
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ mov64(temp, 1);
+ shlxq(temp, temp, length);
+ decq(temp);
+ kmovql(mask, temp);
+ evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_256bit, type[shift]);
+ evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_256bit, type[shift]);
+}
+
+
+void MacroAssembler::copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ int shift, int offset) {
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ vmovdqu(xmm, Address(src, index, scale, offset));
+ vmovdqu(Address(dst, index, scale, offset), xmm);
+}
+
+
+void MacroAssembler::copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ bool conjoint, int shift, int offset, bool use64byteVector) {
+ assert(MaxVectorSize == 64 || MaxVectorSize == 32, "vector length mismatch");
+ if (!use64byteVector) {
+ if (conjoint) {
+ copy32_avx(dst, src, index, xmm, shift, offset+32);
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ } else {
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ copy32_avx(dst, src, index, xmm, shift, offset+32);
+ }
+ } else {
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ evmovdquq(xmm, Address(src, index, scale, offset), Assembler::AVX_512bit);
+ evmovdquq(Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit);
+ }
+}
+
+#endif // COMPILER2_OR_JVMCI
+
+#endif
diff --git a/src/hotspot/cpu/x86/methodHandles_x86.hpp b/src/hotspot/cpu/x86/methodHandles_x86.hpp
index bb333781a62..444d0495666 100644
--- a/src/hotspot/cpu/x86/methodHandles_x86.hpp
+++ b/src/hotspot/cpu/x86/methodHandles_x86.hpp
@@ -27,7 +27,7 @@
// Adapters
enum /* platform_dependent_constants */ {
- adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
+ adapter_code_size = 4000 DEBUG_ONLY(+ 6000)
};
// Additional helper methods for MethodHandles code generation:
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
index 066d1ae98cb..3e2b3a118c7 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
@@ -37,6 +37,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1213,265 +1214,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty
}
}
-
-static void save_or_restore_arguments(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap* map,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- // if map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int handle_index = 0;
- // Save down double word first
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
- int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- handle_index += 2;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
- int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- handle_index += 2;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
- if (in_regs[i].second()->is_Register()) {
- __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
- }
- } else {
- __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
- if (in_regs[i].second()->is_Register()) {
- __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
- }
- }
- }
- }
- // Save or restore single word registers
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- assert(handle_index <= stack_slots, "overflow");
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- map->set_oop(VMRegImpl::stack2reg(slot));;
- }
-
- // Value is in an input register pass we must flush it to the stack
- const Register reg = in_regs[i].first()->as_Register();
- switch (in_sig_bt[i]) {
- case T_ARRAY:
- if (map != NULL) {
- __ movptr(Address(rsp, offset), reg);
- } else {
- __ movptr(reg, Address(rsp, offset));
- }
- break;
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT:
- if (map != NULL) {
- __ movl(Address(rsp, offset), reg);
- } else {
- __ movl(reg, Address(rsp, offset));
- }
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_XMMRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- } else if (in_regs[i].first()->is_stack()) {
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
- }
-}
-
-// Registers need to be saved for runtime call
-static Register caller_saved_registers[] = {
- rcx, rdx, rsi, rdi
-};
-
-// Save caller saved registers except r1 and r2
-static void save_registers_except(MacroAssembler* masm, Register r1, Register r2) {
- int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
- for (int index = 0; index < reg_len; index ++) {
- Register this_reg = caller_saved_registers[index];
- if (this_reg != r1 && this_reg != r2) {
- __ push(this_reg);
- }
- }
-}
-
-// Restore caller saved registers except r1 and r2
-static void restore_registers_except(MacroAssembler* masm, Register r1, Register r2) {
- int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
- for (int index = reg_len - 1; index >= 0; index --) {
- Register this_reg = caller_saved_registers[index];
- if (this_reg != r1 && this_reg != r2) {
- __ pop(this_reg);
- }
- }
-}
-
-// Pin object, return pinned object or null in rax
-static void gen_pin_object(MacroAssembler* masm,
- Register thread, VMRegPair reg) {
- __ block_comment("gen_pin_object {");
-
- Label is_null;
- Register tmp_reg = rax;
- VMRegPair tmp(tmp_reg->as_VMReg());
- if (reg.first()->is_stack()) {
- // Load the arg up from the stack
- simple_move32(masm, reg, tmp);
- reg = tmp;
- } else {
- __ movl(tmp_reg, reg.first()->as_Register());
- }
- __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
- __ jccb(Assembler::equal, is_null);
-
- // Save registers that may be used by runtime call
- Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
- save_registers_except(masm, arg, thread);
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::pin_object),
- thread, reg.first()->as_Register());
-
- // Restore saved registers
- restore_registers_except(masm, arg, thread);
-
- __ bind(is_null);
- __ block_comment("} gen_pin_object");
-}
-
-// Unpin object
-static void gen_unpin_object(MacroAssembler* masm,
- Register thread, VMRegPair reg) {
- __ block_comment("gen_unpin_object {");
- Label is_null;
-
- // temp register
- __ push(rax);
- Register tmp_reg = rax;
- VMRegPair tmp(tmp_reg->as_VMReg());
-
- simple_move32(masm, reg, tmp);
-
- __ testptr(rax, rax);
- __ jccb(Assembler::equal, is_null);
-
- // Save registers that may be used by runtime call
- Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
- save_registers_except(masm, arg, thread);
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object),
- thread, rax);
-
- // Restore saved registers
- restore_registers_except(masm, arg, thread);
- __ bind(is_null);
- __ pop(rax);
- __ block_comment("} gen_unpin_object");
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- Register thread,
- int stack_slots,
- int total_c_args,
- int total_in_args,
- int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
- __ cmp8(ExternalAddress((address)GCLocker::needs_gc_address()), false);
- __ jcc(Assembler::equal, cont);
-
- // Save down any incoming oops and call into the runtime to halt for a GC
-
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- address the_pc = __ pc();
- oop_maps->add_gc_map( __ offset(), map);
- __ set_last_Java_frame(thread, rsp, noreg, the_pc);
-
- __ block_comment("block_for_jni_critical");
- __ push(thread);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
- __ increment(rsp, wordSize);
-
- __ get_thread(thread);
- __ reset_last_Java_frame(thread, false);
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
-
- __ bind(cont);
-#ifdef ASSERT
- if (StressCriticalJNINatives) {
- // Stress register saving
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- // Destroy argument registers
- for (int i = 0; i < total_in_args - 1; i++) {
- if (in_regs[i].first()->is_Register()) {
- const Register reg = in_regs[i].first()->as_Register();
- __ xorptr(reg, reg);
- } else if (in_regs[i].first()->is_XMMRegister()) {
- __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
- } else if (in_regs[i].first()->is_FloatRegister()) {
- ShouldNotReachHere();
- } else if (in_regs[i].first()->is_stack()) {
- // Nothing to do
- } else {
- ShouldNotReachHere();
- }
- if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
- i++;
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-#endif
-}
-
// Unpack an array argument into a pointer to the body and the length
// if the array is non-null, otherwise pass 0 for both.
static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
@@ -1596,24 +1338,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they cannot stop for GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
//
nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const methodHandle& method,
@@ -1925,11 +1655,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ get_thread(thread);
- if (is_critical_native && !Universe::heap()->supports_object_pinning()) {
- check_needs_gc_for_critical_native(masm, thread, stack_slots, total_c_args, total_in_args,
- oop_handle_offset, oop_maps, in_regs, in_sig_bt);
- }
-
//
// We immediately shuffle the arguments so that any vm call we have to
// make from here on out (sync slow path, jvmti, etc.) we will have
@@ -1963,11 +1688,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
//
OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- // Inbound arguments that need to be pinned for critical natives
- GrowableArray pinned_args(total_in_args);
- // Current stack slot for storing register based array argument
- int pinned_slot = oop_handle_offset;
-
// Mark location of rbp,
// map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, rbp->as_VMReg());
@@ -1980,26 +1700,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
case T_ARRAY:
if (is_critical_native) {
VMRegPair in_arg = in_regs[i];
- if (Universe::heap()->supports_object_pinning()) {
- // gen_pin_object handles save and restore
- // of any clobbered registers
- gen_pin_object(masm, thread, in_arg);
- pinned_args.append(i);
-
- // rax has pinned array
- VMRegPair result_reg(rax->as_VMReg());
- if (!in_arg.first()->is_stack()) {
- assert(pinned_slot <= stack_slots, "overflow");
- simple_move32(masm, result_reg, VMRegImpl::stack2reg(pinned_slot));
- pinned_slot += VMRegImpl::slots_per_word;
- } else {
- // Write back pinned value, it will be used to unpin this argument
- __ movptr(Address(rbp, reg2offset_in(in_arg.first())), result_reg.first()->as_Register());
- }
- // We have the array in register, use it
- in_arg = result_reg;
- }
-
unpack_array_argument(masm, in_arg, in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
c_arg++;
break;
@@ -2154,15 +1854,14 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Finally just about ready to make the JNI call
-
// get JNIEnv* which is first argument to native
if (!is_critical_native) {
__ lea(rdx, Address(thread, in_bytes(JavaThread::jni_environment_offset())));
__ movptr(Address(rsp, 0), rdx);
- }
- // Now set thread in native
- __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
+ // Now set thread in native
+ __ movl(Address(thread, JavaThread::thread_state_offset()), _thread_in_native);
+ }
__ call(RuntimeAddress(native_func));
@@ -2193,24 +1892,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
default : ShouldNotReachHere();
}
- // unpin pinned arguments
- pinned_slot = oop_handle_offset;
- if (pinned_args.length() > 0) {
- // save return value that may be overwritten otherwise.
- save_native_result(masm, ret_type, stack_slots);
- for (int index = 0; index < pinned_args.length(); index ++) {
- int i = pinned_args.at(index);
- assert(pinned_slot <= stack_slots, "overflow");
- if (!in_regs[i].first()->is_stack()) {
- int offset = pinned_slot * VMRegImpl::stack_slot_size;
- __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
- pinned_slot += VMRegImpl::slots_per_word;
- }
- // gen_pin_object handles save and restore
- // of any other clobbered registers
- gen_unpin_object(masm, thread, in_regs[i]);
- }
- restore_native_result(masm, ret_type, stack_slots);
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ __ safepoint_poll(needs_safepoint, thread, false /* at_return */, false /* in_nmethod */);
+ __ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::equal, after_transition);
+ __ bind(needs_safepoint);
}
// Switch thread to "native transition" state before reading the synchronization state.
@@ -2232,12 +1924,10 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
}
- Label after_transition;
-
// check for safepoint operation in progress and/or pending suspend requests
{ Label Continue, slow_path;
- __ safepoint_poll(slow_path, thread, noreg);
+ __ safepoint_poll(slow_path, thread, true /* at_return */, false /* in_nmethod */);
__ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
__ jcc(Assembler::equal, Continue);
@@ -2253,23 +1943,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
save_native_result(masm, ret_type, stack_slots);
__ push(thread);
- if (!is_critical_native) {
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
JavaThread::check_special_condition_for_native_trans)));
- } else {
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address,
- JavaThread::check_special_condition_for_native_trans_and_transition)));
- }
__ increment(rsp, wordSize);
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
-
- if (is_critical_native) {
- // The call above performed the transition to thread_in_Java so
- // skip the transition logic below.
- __ jmpb(after_transition);
- }
-
__ bind(Continue);
}
@@ -2510,10 +2188,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
}
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
index 1f96dc6ecfe..b238b0e0d35 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp
@@ -42,6 +42,7 @@
#include "memory/universe.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1377,222 +1378,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR
}
}
-
-static void save_or_restore_arguments(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap* map,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- // if map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int slot = arg_save_area;
- // Save down double word first
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow");
- if (map != NULL) {
- __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- if (in_regs[i].first()->is_Register() &&
- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
- int offset = slot * VMRegImpl::stack_slot_size;
- if (map != NULL) {
- __ movq(Address(rsp, offset), in_regs[i].first()->as_Register());
- if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));;
- }
- } else {
- __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
- }
- slot += VMRegImpl::slots_per_word;
- }
- }
- // Save or restore single word registers
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow");
-
- // Value is in an input register pass we must flush it to the stack
- const Register reg = in_regs[i].first()->as_Register();
- switch (in_sig_bt[i]) {
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT:
- if (map != NULL) {
- __ movl(Address(rsp, offset), reg);
- } else {
- __ movl(reg, Address(rsp, offset));
- }
- break;
- case T_ARRAY:
- case T_LONG:
- // handled above
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_XMMRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow");
- if (map != NULL) {
- __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- } else if (in_regs[i].first()->is_stack()) {
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
- }
-}
-
-// Pin object, return pinned object or null in rax
-static void gen_pin_object(MacroAssembler* masm,
- VMRegPair reg) {
- __ block_comment("gen_pin_object {");
-
- // rax always contains oop, either incoming or
- // pinned.
- Register tmp_reg = rax;
-
- Label is_null;
- VMRegPair tmp;
- VMRegPair in_reg = reg;
-
- tmp.set_ptr(tmp_reg->as_VMReg());
- if (reg.first()->is_stack()) {
- // Load the arg up from the stack
- move_ptr(masm, reg, tmp);
- reg = tmp;
- } else {
- __ movptr(rax, reg.first()->as_Register());
- }
- __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
- __ jccb(Assembler::equal, is_null);
-
- if (reg.first()->as_Register() != c_rarg1) {
- __ movptr(c_rarg1, reg.first()->as_Register());
- }
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::pin_object),
- r15_thread, c_rarg1);
-
- __ bind(is_null);
- __ block_comment("} gen_pin_object");
-}
-
-// Unpin object
-static void gen_unpin_object(MacroAssembler* masm,
- VMRegPair reg) {
- __ block_comment("gen_unpin_object {");
- Label is_null;
-
- if (reg.first()->is_stack()) {
- __ movptr(c_rarg1, Address(rbp, reg2offset_in(reg.first())));
- } else if (reg.first()->as_Register() != c_rarg1) {
- __ movptr(c_rarg1, reg.first()->as_Register());
- }
-
- __ testptr(c_rarg1, c_rarg1);
- __ jccb(Assembler::equal, is_null);
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object),
- r15_thread, c_rarg1);
-
- __ bind(is_null);
- __ block_comment("} gen_unpin_object");
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- int stack_slots,
- int total_c_args,
- int total_in_args,
- int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
- __ cmp8(ExternalAddress((address)GCLocker::needs_gc_address()), false);
- __ jcc(Assembler::equal, cont);
-
- // Save down any incoming oops and call into the runtime to halt for a GC
-
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- address the_pc = __ pc();
- oop_maps->add_gc_map( __ offset(), map);
- __ set_last_Java_frame(rsp, noreg, the_pc);
-
- __ block_comment("block_for_jni_critical");
- __ movptr(c_rarg0, r15_thread);
- __ mov(r12, rsp); // remember sp
- __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
- __ andptr(rsp, -16); // align stack as required by ABI
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
- __ mov(rsp, r12); // restore sp
- __ reinit_heapbase();
-
- __ reset_last_Java_frame(false);
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- __ bind(cont);
-#ifdef ASSERT
- if (StressCriticalJNINatives) {
- // Stress register saving
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- // Destroy argument registers
- for (int i = 0; i < total_in_args - 1; i++) {
- if (in_regs[i].first()->is_Register()) {
- const Register reg = in_regs[i].first()->as_Register();
- __ xorptr(reg, reg);
- } else if (in_regs[i].first()->is_XMMRegister()) {
- __ xorpd(in_regs[i].first()->as_XMMRegister(), in_regs[i].first()->as_XMMRegister());
- } else if (in_regs[i].first()->is_FloatRegister()) {
- ShouldNotReachHere();
- } else if (in_regs[i].first()->is_stack()) {
- // Nothing to do
- } else {
- ShouldNotReachHere();
- }
- if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
- i++;
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-#endif
-}
-
// Unpack an array argument into a pointer to the body and the length
// if the array is non-null, otherwise pass 0 for both.
static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
@@ -1897,25 +1682,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they cannot stop for GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
-//
nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const methodHandle& method,
int compile_id,
@@ -2216,11 +1988,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const Register oop_handle_reg = r14;
- if (is_critical_native && !Universe::heap()->supports_object_pinning()) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
- oop_handle_offset, oop_maps, in_regs, in_sig_bt);
- }
-
//
// We immediately shuffle the arguments so that any vm call we have to
// make from here on out (sync slow path, jvmti, etc.) we will have
@@ -2273,10 +2040,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// the incoming and outgoing registers are offset upwards and for
// critical natives they are offset down.
GrowableArray arg_order(2 * total_in_args);
- // Inbound arguments that need to be pinned for critical natives
- GrowableArray pinned_args(total_in_args);
- // Current stack slot for storing register based array argument
- int pinned_slot = oop_handle_offset;
VMRegPair tmp_vmreg;
tmp_vmreg.set2(rbx->as_VMReg());
@@ -2325,23 +2088,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
switch (in_sig_bt[i]) {
case T_ARRAY:
if (is_critical_native) {
- // pin before unpack
- if (Universe::heap()->supports_object_pinning()) {
- save_args(masm, total_c_args, 0, out_regs);
- gen_pin_object(masm, in_regs[i]);
- pinned_args.append(i);
- restore_args(masm, total_c_args, 0, out_regs);
-
- // rax has pinned array
- VMRegPair result_reg;
- result_reg.set_ptr(rax->as_VMReg());
- move_ptr(masm, result_reg, in_regs[i]);
- if (!in_regs[i].first()->is_stack()) {
- assert(pinned_slot <= stack_slots, "overflow");
- move_ptr(masm, result_reg, VMRegImpl::stack2reg(pinned_slot));
- pinned_slot += VMRegImpl::slots_per_word;
- }
- }
unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
c_arg++;
#ifdef ASSERT
@@ -2520,17 +2266,15 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ bind(lock_done);
}
-
// Finally just about ready to make the JNI call
-
// get JNIEnv* which is first argument to native
if (!is_critical_native) {
__ lea(c_rarg0, Address(r15_thread, in_bytes(JavaThread::jni_environment_offset())));
- }
- // Now set thread in native
- __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
+ // Now set thread in native
+ __ movl(Address(r15_thread, JavaThread::thread_state_offset()), _thread_in_native);
+ }
__ call(RuntimeAddress(native_func));
@@ -2556,22 +2300,17 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
default : ShouldNotReachHere();
}
- // unpin pinned arguments
- pinned_slot = oop_handle_offset;
- if (pinned_args.length() > 0) {
- // save return value that may be overwritten otherwise.
- save_native_result(masm, ret_type, stack_slots);
- for (int index = 0; index < pinned_args.length(); index ++) {
- int i = pinned_args.at(index);
- assert(pinned_slot <= stack_slots, "overflow");
- if (!in_regs[i].first()->is_stack()) {
- int offset = pinned_slot * VMRegImpl::stack_slot_size;
- __ movq(in_regs[i].first()->as_Register(), Address(rsp, offset));
- pinned_slot += VMRegImpl::slots_per_word;
- }
- gen_unpin_object(masm, in_regs[i]);
- }
- restore_native_result(masm, ret_type, stack_slots);
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ __ safepoint_poll(needs_safepoint, r15_thread, false /* at_return */, false /* in_nmethod */);
+ __ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
+ __ jcc(Assembler::equal, after_transition);
+ __ bind(needs_safepoint);
}
// Switch thread to "native transition" state before reading the synchronization state.
@@ -2588,14 +2327,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
Assembler::LoadLoad | Assembler::LoadStore |
Assembler::StoreLoad | Assembler::StoreStore));
- Label after_transition;
-
// check for safepoint operation in progress and/or pending suspend requests
{
Label Continue;
Label slow_path;
- __ safepoint_poll(slow_path, r15_thread, rscratch1);
+ __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */);
__ cmpl(Address(r15_thread, JavaThread::suspend_flags_offset()), 0);
__ jcc(Assembler::equal, Continue);
@@ -2613,22 +2350,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ mov(r12, rsp); // remember sp
__ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
__ andptr(rsp, -16); // align stack as required by ABI
- if (!is_critical_native) {
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- } else {
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
- }
+ __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
__ mov(rsp, r12); // restore sp
__ reinit_heapbase();
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
-
- if (is_critical_native) {
- // The call above performed the transition to thread_in_Java so
- // skip the transition logic below.
- __ jmpb(after_transition);
- }
-
__ bind(Continue);
}
@@ -2852,12 +2578,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
-
}
// this function returns the adjust size (in number of words) to a c2i adapter
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
index c5d0effae0f..4bc3b0340b5 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_32.cpp
@@ -587,6 +587,29 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_iota_indices(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data(0x03020100, relocInfo::none, 0);
+ __ emit_data(0x07060504, relocInfo::none, 0);
+ __ emit_data(0x0B0A0908, relocInfo::none, 0);
+ __ emit_data(0x0F0E0D0C, relocInfo::none, 0);
+ __ emit_data(0x13121110, relocInfo::none, 0);
+ __ emit_data(0x17161514, relocInfo::none, 0);
+ __ emit_data(0x1B1A1918, relocInfo::none, 0);
+ __ emit_data(0x1F1E1D1C, relocInfo::none, 0);
+ __ emit_data(0x23222120, relocInfo::none, 0);
+ __ emit_data(0x27262524, relocInfo::none, 0);
+ __ emit_data(0x2B2A2928, relocInfo::none, 0);
+ __ emit_data(0x2F2E2D2C, relocInfo::none, 0);
+ __ emit_data(0x33323130, relocInfo::none, 0);
+ __ emit_data(0x37363534, relocInfo::none, 0);
+ __ emit_data(0x3B3A3938, relocInfo::none, 0);
+ __ emit_data(0x3F3E3D3C, relocInfo::none, 0);
+ return start;
+ }
+
address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@@ -627,6 +650,40 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
+ int32_t val0, int32_t val1, int32_t val2, int32_t val3,
+ int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
+ int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
+ int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ assert(len != Assembler::AVX_NoVec, "vector len must be specified");
+ __ emit_data(val0, relocInfo::none, 0);
+ __ emit_data(val1, relocInfo::none, 0);
+ __ emit_data(val2, relocInfo::none, 0);
+ __ emit_data(val3, relocInfo::none, 0);
+ if (len >= Assembler::AVX_256bit) {
+ __ emit_data(val4, relocInfo::none, 0);
+ __ emit_data(val5, relocInfo::none, 0);
+ __ emit_data(val6, relocInfo::none, 0);
+ __ emit_data(val7, relocInfo::none, 0);
+ if (len >= Assembler::AVX_512bit) {
+ __ emit_data(val8, relocInfo::none, 0);
+ __ emit_data(val9, relocInfo::none, 0);
+ __ emit_data(val10, relocInfo::none, 0);
+ __ emit_data(val11, relocInfo::none, 0);
+ __ emit_data(val12, relocInfo::none, 0);
+ __ emit_data(val13, relocInfo::none, 0);
+ __ emit_data(val14, relocInfo::none, 0);
+ __ emit_data(val15, relocInfo::none, 0);
+ }
+ }
+
+ return start;
+ }
+
//----------------------------------------------------------------------------------------------------
// Non-destructive plausibility checks for oops
@@ -3612,7 +3669,7 @@ class StubGenerator: public StubCodeGenerator {
__ pusha();
// xmm0 and xmm1 may be used for passing float/double arguments
- const int xmm_size = wordSize * 2;
+ const int xmm_size = wordSize * 4;
const int xmm_spill_size = xmm_size * 2;
__ subptr(rsp, xmm_spill_size);
__ movdqu(Address(rsp, xmm_size * 1), xmm1);
@@ -3902,8 +3959,19 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF);
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000);
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff);
+ StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff);
+ StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff);
+ StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
+ 0xFFFFFFFF, 0, 0, 0);
+ StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
+ StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x03020100);
+ StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x01000100);
+ StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask_long_double("vector_long_shuffle_mask", 0x00000001, 0x0);
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
+ StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFF);
+ StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
// support for verify_oop (must happen after universe_init)
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
diff --git a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
index 3d2c7671304..b028e6a9c9b 100644
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp
@@ -809,6 +809,21 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_iota_indices(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data64(0x0706050403020100, relocInfo::none);
+ __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
+ __ emit_data64(0x1716151413121110, relocInfo::none);
+ __ emit_data64(0x1F1E1D1C1B1A1918, relocInfo::none);
+ __ emit_data64(0x2726252423222120, relocInfo::none);
+ __ emit_data64(0x2F2E2D2C2B2A2928, relocInfo::none);
+ __ emit_data64(0x3736353433323130, relocInfo::none);
+ __ emit_data64(0x3F3E3D3C3B3A3938, relocInfo::none);
+ return start;
+ }
+
address generate_fp_mask(const char *stub_name, int64_t mask) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", stub_name);
@@ -854,6 +869,57 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ address generate_vector_fp_mask(const char *stub_name, int64_t mask) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+ __ emit_data64(mask, relocInfo::none);
+
+ return start;
+ }
+
+ address generate_vector_custom_i32(const char *stub_name, Assembler::AvxVectorLen len,
+ int32_t val0, int32_t val1, int32_t val2, int32_t val3,
+ int32_t val4 = 0, int32_t val5 = 0, int32_t val6 = 0, int32_t val7 = 0,
+ int32_t val8 = 0, int32_t val9 = 0, int32_t val10 = 0, int32_t val11 = 0,
+ int32_t val12 = 0, int32_t val13 = 0, int32_t val14 = 0, int32_t val15 = 0) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+
+ assert(len != Assembler::AVX_NoVec, "vector len must be specified");
+ __ emit_data(val0, relocInfo::none, 0);
+ __ emit_data(val1, relocInfo::none, 0);
+ __ emit_data(val2, relocInfo::none, 0);
+ __ emit_data(val3, relocInfo::none, 0);
+ if (len >= Assembler::AVX_256bit) {
+ __ emit_data(val4, relocInfo::none, 0);
+ __ emit_data(val5, relocInfo::none, 0);
+ __ emit_data(val6, relocInfo::none, 0);
+ __ emit_data(val7, relocInfo::none, 0);
+ if (len >= Assembler::AVX_512bit) {
+ __ emit_data(val8, relocInfo::none, 0);
+ __ emit_data(val9, relocInfo::none, 0);
+ __ emit_data(val10, relocInfo::none, 0);
+ __ emit_data(val11, relocInfo::none, 0);
+ __ emit_data(val12, relocInfo::none, 0);
+ __ emit_data(val13, relocInfo::none, 0);
+ __ emit_data(val14, relocInfo::none, 0);
+ __ emit_data(val15, relocInfo::none, 0);
+ }
+ }
+
+ return start;
+ }
+
// Non-destructive plausibility checks for oops
//
// Arguments:
@@ -1124,59 +1190,28 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
- // Copy 64-bytes per iteration
- if (UseAVX > 2) {
- Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
-
- __ BIND(L_copy_bytes);
- __ cmpptr(qword_count, (-1 * AVX3Threshold / 8));
- __ jccb(Assembler::less, L_above_threshold);
- __ jmpb(L_below_threshold);
-
- __ bind(L_loop_avx512);
- __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
- __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
- __ bind(L_above_threshold);
- __ addptr(qword_count, 8);
- __ jcc(Assembler::lessEqual, L_loop_avx512);
- __ jmpb(L_32_byte_head);
-
- __ bind(L_loop_avx2);
+ __ BIND(L_loop);
+ if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
__ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
__ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
- __ bind(L_below_threshold);
- __ addptr(qword_count, 8);
- __ jcc(Assembler::lessEqual, L_loop_avx2);
-
- __ bind(L_32_byte_head);
- __ subptr(qword_count, 4); // sub(8) and add(4)
- __ jccb(Assembler::greater, L_end);
} else {
- __ BIND(L_loop);
- if (UseAVX == 2) {
- __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
- __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
- __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
- __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
- } else {
- __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
- __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
- __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
- __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
- __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
- __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
- __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
- __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
- }
-
- __ BIND(L_copy_bytes);
- __ addptr(qword_count, 8);
- __ jcc(Assembler::lessEqual, L_loop);
- __ subptr(qword_count, 4); // sub(8) and add(4)
- __ jccb(Assembler::greater, L_end);
+ __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
+ __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
+ __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
+ __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
+ __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
}
+
+ __ BIND(L_copy_bytes);
+ __ addptr(qword_count, 8);
+ __ jcc(Assembler::lessEqual, L_loop);
+ __ subptr(qword_count, 4); // sub(8) and add(4)
+ __ jccb(Assembler::greater, L_end);
// Copy trailing 32 bytes
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
@@ -1232,60 +1267,29 @@ class StubGenerator: public StubCodeGenerator {
__ align(OptoLoopAlignment);
if (UseUnalignedLoadStores) {
Label L_end;
- // Copy 64-bytes per iteration
- if (UseAVX > 2) {
- Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
-
- __ BIND(L_copy_bytes);
- __ cmpptr(qword_count, (AVX3Threshold / 8));
- __ jccb(Assembler::greater, L_above_threshold);
- __ jmpb(L_below_threshold);
-
- __ BIND(L_loop_avx512);
- __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
- __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
- __ bind(L_above_threshold);
- __ subptr(qword_count, 8);
- __ jcc(Assembler::greaterEqual, L_loop_avx512);
- __ jmpb(L_32_byte_head);
-
- __ bind(L_loop_avx2);
+ __ BIND(L_loop);
+ if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
__ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
- __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
- __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
- __ bind(L_below_threshold);
- __ subptr(qword_count, 8);
- __ jcc(Assembler::greaterEqual, L_loop_avx2);
-
- __ bind(L_32_byte_head);
- __ addptr(qword_count, 4); // add(8) and sub(4)
- __ jccb(Assembler::less, L_end);
+ __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
+ __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
} else {
- __ BIND(L_loop);
- if (UseAVX == 2) {
- __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
- __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
- __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
- __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
- } else {
- __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
- __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
- __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
- __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
- __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
- __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
- __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
- __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
- }
+ __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
+ __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
+ __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
+ __ movdqu(xmm3, Address(from, qword_count, Address::times_8, 0));
+ __ movdqu(Address(dest, qword_count, Address::times_8, 0), xmm3);
+ }
- __ BIND(L_copy_bytes);
- __ subptr(qword_count, 8);
- __ jcc(Assembler::greaterEqual, L_loop);
+ __ BIND(L_copy_bytes);
+ __ subptr(qword_count, 8);
+ __ jcc(Assembler::greaterEqual, L_loop);
- __ addptr(qword_count, 4); // add(8) and sub(4)
- __ jccb(Assembler::less, L_end);
- }
+ __ addptr(qword_count, 4); // add(8) and sub(4)
+ __ jccb(Assembler::less, L_end);
// Copy trailing 32 bytes
if (UseAVX >= 2) {
__ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
@@ -1323,6 +1327,444 @@ class StubGenerator: public StubCodeGenerator {
__ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
}
+#ifndef PRODUCT
+ int& get_profile_ctr(int shift) {
+ if ( 0 == shift)
+ return SharedRuntime::_jbyte_array_copy_ctr;
+ else if(1 == shift)
+ return SharedRuntime::_jshort_array_copy_ctr;
+ else if(2 == shift)
+ return SharedRuntime::_jint_array_copy_ctr;
+ else
+ return SharedRuntime::_jlong_array_copy_ctr;
+ }
+#endif
+
+ void setup_argument_regs(BasicType type) {
+ if (type == T_BYTE || type == T_SHORT) {
+ setup_arg_regs(); // from => rdi, to => rsi, count => rdx
+ // r9 and r10 may be used to save non-volatile registers
+ } else {
+ setup_arg_regs_using_thread(); // from => rdi, to => rsi, count => rdx
+ // r9 is used to save r15_thread
+ }
+ }
+
+ void restore_argument_regs(BasicType type) {
+ if (type == T_BYTE || type == T_SHORT) {
+ restore_arg_regs();
+ } else {
+ restore_arg_regs_using_thread();
+ }
+ }
+
+#if COMPILER2_OR_JVMCI
+ // Note: Following rules apply to AVX3 optimized arraycopy stubs:-
+ // - If target supports AVX3 features (BW+VL+F) then implementation uses 32 byte vectors (YMMs)
+ // for both special cases (various small block sizes) and aligned copy loop. This is the
+ // default configuration.
+ // - If copy length is above AVX3Threshold, then implementation use 64 byte vectors (ZMMs)
+ // for main copy loop (and subsequent tail) since bulk of the cycles will be consumed in it.
+ // - If user forces MaxVectorSize=32 then above 4096 bytes its seen that REP MOVs shows a
+ // better performance for disjoint copies. For conjoint/backward copy vector based
+ // copy performs better.
+ // - If user sets AVX3Threshold=0, then special cases for small blocks sizes operate over
+ // 64 byte vector registers (ZMMs).
+
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ //
+ // Side Effects:
+ // disjoint_copy_avx3_masked is set to the no-overlap entry point
+ // used by generate_conjoint_[byte/int/short/long]_copy().
+ //
+
+ address generate_disjoint_copy_avx3_masked(address* entry, const char *name, int shift,
+ bool aligned, bool is_oop, bool dest_uninitialized) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
+ Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
+ Label L_repmovs, L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
+ const Register from = rdi; // source array address
+ const Register to = rsi; // destination array address
+ const Register count = rdx; // elements count
+ const Register temp1 = r8;
+ const Register temp2 = r11;
+ const Register temp3 = rax;
+ const Register temp4 = rcx;
+ // End pointers are inclusive, and if count is not zero they point
+ // to the last unit copied: end_to[0] := end_from[0]
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
+
+ if (entry != NULL) {
+ *entry = __ pc();
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Entry:");
+ }
+
+ BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ BasicType type = is_oop ? T_OBJECT : type_vec[shift];
+
+ setup_argument_regs(type);
+
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
+ if (dest_uninitialized) {
+ decorators |= IS_DEST_UNINITIALIZED;
+ }
+ if (aligned) {
+ decorators |= ARRAYCOPY_ALIGNED;
+ }
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
+
+ {
+ // Type(shift) byte(0), short(1), int(2), long(3)
+ int loop_size[] = { 192, 96, 48, 24};
+ int threshold[] = { 4096, 2048, 1024, 512};
+
+ // UnsafeCopyMemory page error: continue after ucm
+ UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+ // 'from', 'to' and 'count' are now valid
+
+ // temp1 holds remaining count and temp4 holds running count used to compute
+ // next address offset for start of to/from addresses (temp4 * scale).
+ __ mov64(temp4, 0);
+ __ movq(temp1, count);
+
+ // Zero length check.
+ __ BIND(L_tail);
+ __ cmpq(temp1, 0);
+ __ jcc(Assembler::lessEqual, L_exit);
+
+ // Special cases using 32 byte [masked] vector copy operations.
+ __ arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
+ temp4, temp3, use64byteVector, L_entry, L_exit);
+
+ // PRE-MAIN-POST loop for aligned copy.
+ __ BIND(L_entry);
+
+ if (AVX3Threshold != 0) {
+ __ cmpq(count, threshold[shift]);
+ if (MaxVectorSize == 64) {
+ // Copy using 64 byte vectors.
+ __ jcc(Assembler::greaterEqual, L_pre_main_post_64);
+ } else {
+ assert(MaxVectorSize < 64, "vector size should be < 64 bytes");
+ // REP MOVS offer a faster copy path.
+ __ jcc(Assembler::greaterEqual, L_repmovs);
+ }
+ }
+
+ if (MaxVectorSize < 64 || AVX3Threshold != 0) {
+ // Partial copy to make dst address 32 byte aligned.
+ __ movq(temp2, to);
+ __ andq(temp2, 31);
+ __ jcc(Assembler::equal, L_main_pre_loop);
+
+ __ negptr(temp2);
+ __ addq(temp2, 32);
+ if (shift) {
+ __ shrq(temp2, shift);
+ }
+ __ movq(temp3, temp2);
+ __ copy32_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift);
+ __ movq(temp4, temp2);
+ __ movq(temp1, count);
+ __ subq(temp1, temp2);
+
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::less, L_tail);
+
+ __ BIND(L_main_pre_loop);
+ __ subq(temp1, loop_size[shift]);
+
+ // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
+ __ BIND(L_main_loop);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 0);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 64);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 128);
+ __ addptr(temp4, loop_size[shift]);
+ __ subq(temp1, loop_size[shift]);
+ __ jcc(Assembler::greater, L_main_loop);
+
+ __ addq(temp1, loop_size[shift]);
+
+ // Tail loop.
+ __ jmp(L_tail);
+
+ __ BIND(L_repmovs);
+ __ movq(temp2, temp1);
+ // Swap to(RSI) and from(RDI) addresses to comply with REP MOVs semantics.
+ __ movq(temp3, to);
+ __ movq(to, from);
+ __ movq(from, temp3);
+ // Save to/from for restoration post rep_mov.
+ __ movq(temp1, to);
+ __ movq(temp3, from);
+ if(shift < 3) {
+ __ shrq(temp2, 3-shift); // quad word count
+ }
+ __ movq(temp4 , temp2); // move quad ward count into temp4(RCX).
+ __ rep_mov();
+ __ shlq(temp2, 3); // convert quad words into byte count.
+ if(shift) {
+ __ shrq(temp2, shift); // type specific count.
+ }
+ // Restore original addresses in to/from.
+ __ movq(to, temp3);
+ __ movq(from, temp1);
+ __ movq(temp4, temp2);
+ __ movq(temp1, count);
+ __ subq(temp1, temp2); // tailing part (less than a quad ward size).
+ __ jmp(L_tail);
+ }
+
+ if (MaxVectorSize > 32) {
+ __ BIND(L_pre_main_post_64);
+ // Partial copy to make dst address 64 byte aligned.
+ __ movq(temp2, to);
+ __ andq(temp2, 63);
+ __ jcc(Assembler::equal, L_main_pre_loop_64bytes);
+
+ __ negptr(temp2);
+ __ addq(temp2, 64);
+ if (shift) {
+ __ shrq(temp2, shift);
+ }
+ __ movq(temp3, temp2);
+ __ copy64_masked_avx(to, from, xmm1, k2, temp3, temp4, temp1, shift, 0 , true);
+ __ movq(temp4, temp2);
+ __ movq(temp1, count);
+ __ subq(temp1, temp2);
+
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::less, L_tail64);
+
+ __ BIND(L_main_pre_loop_64bytes);
+ __ subq(temp1, loop_size[shift]);
+
+ // Main loop with aligned copy block size of 192 bytes at
+ // 64 byte copy granularity.
+ __ BIND(L_main_loop_64bytes);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 0 , true);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 64, true);
+ __ copy64_avx(to, from, temp4, xmm1, false, shift, 128, true);
+ __ addptr(temp4, loop_size[shift]);
+ __ subq(temp1, loop_size[shift]);
+ __ jcc(Assembler::greater, L_main_loop_64bytes);
+
+ __ addq(temp1, loop_size[shift]);
+ // Zero length check.
+ __ jcc(Assembler::lessEqual, L_exit);
+
+ __ BIND(L_tail64);
+
+ // Tail handling using 64 byte [masked] vector copy operations.
+ use64byteVector = true;
+ __ arraycopy_avx3_special_cases(xmm1, k2, from, to, temp1, shift,
+ temp4, temp3, use64byteVector, L_entry, L_exit);
+ }
+ __ BIND(L_exit);
+ }
+
+ address ucme_exit_pc = __ pc();
+ // When called from generic_arraycopy r11 contains specific values
+ // used during arraycopy epilogue, re-initializing r11.
+ if (is_oop) {
+ __ movq(r11, shift == 3 ? count : to);
+ }
+ bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
+ restore_argument_regs(type);
+ inc_counter_np(get_profile_ctr(shift)); // Update counter after rscratch1 is free
+ __ xorptr(rax, rax); // return 0
+ __ vzeroupper();
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+ return start;
+ }
+
+ // Inputs:
+ // c_rarg0 - source array address
+ // c_rarg1 - destination array address
+ // c_rarg2 - element count, treated as ssize_t, can be zero
+ //
+ //
+ address generate_conjoint_copy_avx3_masked(address* entry, const char *name, int shift,
+ address nooverlap_target, bool aligned, bool is_oop,
+ bool dest_uninitialized) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
+
+ Label L_main_pre_loop, L_main_pre_loop_64bytes, L_pre_main_post_64;
+ Label L_main_loop, L_main_loop_64bytes, L_tail, L_tail64, L_exit, L_entry;
+ const Register from = rdi; // source array address
+ const Register to = rsi; // destination array address
+ const Register count = rdx; // elements count
+ const Register temp1 = r8;
+ const Register temp2 = rcx;
+ const Register temp3 = r11;
+ const Register temp4 = rax;
+ // End pointers are inclusive, and if count is not zero they point
+ // to the last unit copied: end_to[0] := end_from[0]
+
+ __ enter(); // required for proper stackwalking of RuntimeStub frame
+ assert_clean_int(c_rarg2, rax); // Make sure 'count' is clean int.
+
+ if (entry != NULL) {
+ *entry = __ pc();
+ // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+ BLOCK_COMMENT("Entry:");
+ }
+
+ array_overlap_test(nooverlap_target, (Address::ScaleFactor)(shift));
+
+ BasicType type_vec[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ BasicType type = is_oop ? T_OBJECT : type_vec[shift];
+
+ setup_argument_regs(type);
+
+ DecoratorSet decorators = IN_HEAP | IS_ARRAY;
+ if (dest_uninitialized) {
+ decorators |= IS_DEST_UNINITIALIZED;
+ }
+ if (aligned) {
+ decorators |= ARRAYCOPY_ALIGNED;
+ }
+ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+ bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
+ {
+ // Type(shift) byte(0), short(1), int(2), long(3)
+ int loop_size[] = { 192, 96, 48, 24};
+ int threshold[] = { 4096, 2048, 1024, 512};
+
+ // UnsafeCopyMemory page error: continue after ucm
+ UnsafeCopyMemoryMark ucmm(this, !is_oop && !aligned, true);
+ // 'from', 'to' and 'count' are now valid
+
+ // temp1 holds remaining count.
+ __ movq(temp1, count);
+
+ // Zero length check.
+ __ BIND(L_tail);
+ __ cmpq(temp1, 0);
+ __ jcc(Assembler::lessEqual, L_exit);
+
+ __ mov64(temp2, 0);
+ __ movq(temp3, temp1);
+ // Special cases using 32 byte [masked] vector copy operations.
+ __ arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
+ temp4, use64byteVector, L_entry, L_exit);
+
+ // PRE-MAIN-POST loop for aligned copy.
+ __ BIND(L_entry);
+
+ if (MaxVectorSize > 32 && AVX3Threshold != 0) {
+ __ cmpq(temp1, threshold[shift]);
+ __ jcc(Assembler::greaterEqual, L_pre_main_post_64);
+ }
+
+ if (MaxVectorSize < 64 || AVX3Threshold != 0) {
+ // Partial copy to make dst address 32 byte aligned.
+ __ leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
+ __ andq(temp2, 31);
+ __ jcc(Assembler::equal, L_main_pre_loop);
+
+ if (shift) {
+ __ shrq(temp2, shift);
+ }
+ __ subq(temp1, temp2);
+ __ copy32_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift);
+
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::less, L_tail);
+
+ __ BIND(L_main_pre_loop);
+
+ // Main loop with aligned copy block size of 192 bytes at 32 byte granularity.
+ __ BIND(L_main_loop);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -64);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -128);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -192);
+ __ subptr(temp1, loop_size[shift]);
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::greater, L_main_loop);
+
+ // Tail loop.
+ __ jmp(L_tail);
+ }
+
+ if (MaxVectorSize > 32) {
+ __ BIND(L_pre_main_post_64);
+ // Partial copy to make dst address 64 byte aligned.
+ __ leaq(temp2, Address(to, temp1, (Address::ScaleFactor)(shift), 0));
+ __ andq(temp2, 63);
+ __ jcc(Assembler::equal, L_main_pre_loop_64bytes);
+
+ if (shift) {
+ __ shrq(temp2, shift);
+ }
+ __ subq(temp1, temp2);
+ __ copy64_masked_avx(to, from, xmm1, k2, temp2, temp1, temp3, shift, 0 , true);
+
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::less, L_tail64);
+
+ __ BIND(L_main_pre_loop_64bytes);
+
+ // Main loop with aligned copy block size of 192 bytes at
+ // 64 byte copy granularity.
+ __ BIND(L_main_loop_64bytes);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -64 , true);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -128, true);
+ __ copy64_avx(to, from, temp1, xmm1, true, shift, -192, true);
+ __ subq(temp1, loop_size[shift]);
+ __ cmpq(temp1, loop_size[shift]);
+ __ jcc(Assembler::greater, L_main_loop_64bytes);
+
+ // Zero length check.
+ __ cmpq(temp1, 0);
+ __ jcc(Assembler::lessEqual, L_exit);
+
+ __ BIND(L_tail64);
+
+ // Tail handling using 64 byte [masked] vector copy operations.
+ use64byteVector = true;
+ __ mov64(temp2, 0);
+ __ movq(temp3, temp1);
+ __ arraycopy_avx3_special_cases_conjoint(xmm1, k2, from, to, temp2, temp3, temp1, shift,
+ temp4, use64byteVector, L_entry, L_exit);
+ }
+ __ BIND(L_exit);
+ }
+ address ucme_exit_pc = __ pc();
+ // When called from generic_arraycopy r11 contains specific values
+ // used during arraycopy epilogue, re-initializing r11.
+ if(is_oop) {
+ __ movq(r11, count);
+ }
+ bs->arraycopy_epilogue(_masm, decorators, type, from, to, count);
+ restore_argument_regs(type);
+ inc_counter_np(get_profile_ctr(shift)); // Update counter after rscratch1 is free
+ __ xorptr(rax, rax); // return 0
+ __ vzeroupper();
+ __ leave(); // required for proper stackwalking of RuntimeStub frame
+ __ ret(0);
+ return start;
+ }
+#endif // COMPILER2_OR_JVMCI
+
+
// Arguments:
// aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
// ignored
@@ -1343,6 +1785,12 @@ class StubGenerator: public StubCodeGenerator {
// used by generate_conjoint_byte_copy().
//
address generate_disjoint_byte_copy(bool aligned, address* entry, const char *name) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_disjoint_copy_avx3_masked(entry, "jbyte_disjoint_arraycopy_avx3", 0,
+ aligned, false, false);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1453,6 +1901,12 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
address* entry, const char *name) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_conjoint_copy_avx3_masked(entry, "jbyte_conjoint_arraycopy_avx3", 0,
+ nooverlap_target, aligned, false, false);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1558,6 +2012,13 @@ class StubGenerator: public StubCodeGenerator {
// used by generate_conjoint_short_copy().
//
address generate_disjoint_short_copy(bool aligned, address *entry, const char *name) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_disjoint_copy_avx3_masked(entry, "jshort_disjoint_arraycopy_avx3", 1,
+ aligned, false, false);
+ }
+#endif
+
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1682,6 +2143,12 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
address *entry, const char *name) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_conjoint_copy_avx3_masked(entry, "jshort_conjoint_arraycopy_avx3", 1,
+ nooverlap_target, aligned, false, false);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1780,6 +2247,13 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry,
const char *name, bool dest_uninitialized = false) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_disjoint_copy_avx3_masked(entry, "jint_disjoint_arraycopy_avx3", 2,
+ aligned, is_oop, dest_uninitialized);
+ }
+#endif
+
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1884,6 +2358,12 @@ class StubGenerator: public StubCodeGenerator {
address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
address *entry, const char *name,
bool dest_uninitialized = false) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_conjoint_copy_avx3_masked(entry, "jint_conjoint_arraycopy_avx3", 2,
+ nooverlap_target, aligned, is_oop, dest_uninitialized);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -1991,6 +2471,12 @@ class StubGenerator: public StubCodeGenerator {
//
address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address *entry,
const char *name, bool dest_uninitialized = false) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_disjoint_copy_avx3_masked(entry, "jlong_disjoint_arraycopy_avx3", 3,
+ aligned, is_oop, dest_uninitialized);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -2095,6 +2581,12 @@ class StubGenerator: public StubCodeGenerator {
address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
address nooverlap_target, address *entry,
const char *name, bool dest_uninitialized = false) {
+#if COMPILER2_OR_JVMCI
+ if (VM_Version::supports_avx512vlbw() && MaxVectorSize >= 32) {
+ return generate_conjoint_copy_avx3_masked(entry, "jlong_conjoint_arraycopy_avx3", 3,
+ nooverlap_target, aligned, is_oop, dest_uninitialized);
+ }
+#endif
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", name);
address start = __ pc();
@@ -2513,7 +3005,7 @@ class StubGenerator: public StubCodeGenerator {
address long_copy_entry, address checkcast_copy_entry) {
Label L_failed, L_failed_0, L_objArray;
- Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
+ Label L_copy_shorts, L_copy_ints, L_copy_longs;
// Input registers
const Register src = c_rarg0; // source array oop
@@ -2524,7 +3016,7 @@ class StubGenerator: public StubCodeGenerator {
const Register length = c_rarg4;
const Register rklass_tmp = r9; // load_klass
#else
- const Address length(rsp, 6 * wordSize); // elements count is on stack on Win64
+ const Address length(rsp, 7 * wordSize); // elements count is on stack on Win64
const Register rklass_tmp = rdi; // load_klass
#endif
@@ -2546,6 +3038,10 @@ class StubGenerator: public StubCodeGenerator {
__ enter(); // required for proper stackwalking of RuntimeStub frame
+#ifdef _WIN64
+ __ push(rklass_tmp); // rdi is callee-save on Windows
+#endif
+
// bump this on entry, not on exit:
inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
@@ -2676,6 +3172,10 @@ class StubGenerator: public StubCodeGenerator {
BLOCK_COMMENT("choose copy loop based on element size");
__ andl(rax_lh, Klass::_lh_log2_element_size_mask); // rax_lh -> rax_elsize
+#ifdef _WIN64
+ __ pop(rklass_tmp); // Restore callee-save rdi
+#endif
+
// next registers should be set before the jump to corresponding stub
const Register from = c_rarg0; // source array address
const Register to = c_rarg1; // destination array address
@@ -2684,7 +3184,6 @@ class StubGenerator: public StubCodeGenerator {
// 'from', 'to', 'count' registers should be set in such order
// since they are the same as 'src', 'src_pos', 'dst'.
- __ BIND(L_copy_bytes);
__ cmpl(rax_elsize, 0);
__ jccb(Assembler::notEqual, L_copy_shorts);
__ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
@@ -2745,6 +3244,9 @@ class StubGenerator: public StubCodeGenerator {
arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
__ movl2ptr(count, r11_length); // length
__ BIND(L_plain_copy);
+#ifdef _WIN64
+ __ pop(rklass_tmp); // Restore callee-save rdi
+#endif
__ jump(RuntimeAddress(oop_copy_entry));
__ BIND(L_checkcast_copy);
@@ -2784,6 +3286,10 @@ class StubGenerator: public StubCodeGenerator {
__ movl( sco_temp, Address(r11_dst_klass, sco_offset));
assert_clean_int(sco_temp, rax);
+#ifdef _WIN64
+ __ pop(rklass_tmp); // Restore callee-save rdi
+#endif
+
// the checkcast_copy loop needs two extra arguments:
assert(c_rarg3 == sco_temp, "#3 already in place");
// Set up arguments for checkcast_copy_entry.
@@ -2793,6 +3299,9 @@ class StubGenerator: public StubCodeGenerator {
}
__ BIND(L_failed);
+#ifdef _WIN64
+ __ pop(rklass_tmp); // Restore callee-save rdi
+#endif
__ xorptr(rax, rax);
__ notptr(rax); // return -1
__ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -6326,12 +6835,25 @@ address generate_avx_ghash_processBlocks() {
StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
+ StubRoutines::x86::_vector_all_bits_set = generate_vector_mask("vector_all_bits_set", 0xFFFFFFFFFFFFFFFF);
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
+ StubRoutines::x86::_vector_int_to_byte_mask = generate_vector_mask("vector_int_to_byte_mask", 0x000000ff000000ff);
+ StubRoutines::x86::_vector_int_to_short_mask = generate_vector_mask("vector_int_to_short_mask", 0x0000ffff0000ffff);
+ StubRoutines::x86::_vector_32_bit_mask = generate_vector_custom_i32("vector_32_bit_mask", Assembler::AVX_512bit,
+ 0xFFFFFFFF, 0, 0, 0);
+ StubRoutines::x86::_vector_64_bit_mask = generate_vector_custom_i32("vector_64_bit_mask", Assembler::AVX_512bit,
+ 0xFFFFFFFF, 0xFFFFFFFF, 0, 0);
+ StubRoutines::x86::_vector_int_shuffle_mask = generate_vector_mask("vector_int_shuffle_mask", 0x0302010003020100);
+ StubRoutines::x86::_vector_short_shuffle_mask = generate_vector_mask("vector_short_shuffle_mask", 0x0100010001000100);
+ StubRoutines::x86::_vector_long_shuffle_mask = generate_vector_mask("vector_long_shuffle_mask", 0x0000000100000000);
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
+ StubRoutines::x86::_vector_iota_indices = generate_iota_indices("iota_indices");
// support for verify_oop (must happen after universe_init)
- StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
+ if (VerifyOops) {
+ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
+ }
// data cache line writeback
StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.cpp b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
index 5d93d118e7b..45762902db2 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.cpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.cpp
@@ -44,12 +44,21 @@ address StubRoutines::x86::_upper_word_mask_addr = NULL;
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
address StubRoutines::x86::_k256_adr = NULL;
address StubRoutines::x86::_vector_short_to_byte_mask = NULL;
+address StubRoutines::x86::_vector_int_to_byte_mask = NULL;
+address StubRoutines::x86::_vector_int_to_short_mask = NULL;
+address StubRoutines::x86::_vector_all_bits_set = NULL;
+address StubRoutines::x86::_vector_short_shuffle_mask = NULL;
+address StubRoutines::x86::_vector_int_shuffle_mask = NULL;
+address StubRoutines::x86::_vector_long_shuffle_mask = NULL;
address StubRoutines::x86::_vector_float_sign_mask = NULL;
address StubRoutines::x86::_vector_float_sign_flip = NULL;
address StubRoutines::x86::_vector_double_sign_mask = NULL;
address StubRoutines::x86::_vector_double_sign_flip = NULL;
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
address StubRoutines::x86::_vector_long_sign_mask = NULL;
+address StubRoutines::x86::_vector_iota_indices = NULL;
+address StubRoutines::x86::_vector_32_bit_mask = NULL;
+address StubRoutines::x86::_vector_64_bit_mask = NULL;
#ifdef _LP64
address StubRoutines::x86::_k256_W_adr = NULL;
address StubRoutines::x86::_k512_W_addr = NULL;
diff --git a/src/hotspot/cpu/x86/stubRoutines_x86.hpp b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
index a23ee3666a6..fa4c34016a5 100644
--- a/src/hotspot/cpu/x86/stubRoutines_x86.hpp
+++ b/src/hotspot/cpu/x86/stubRoutines_x86.hpp
@@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
- code_size2 = 35300 LP64_ONLY(+11400) // simply increase if too small (assembler will crash if too small)
+ code_size2 = 35300 LP64_ONLY(+25000) // simply increase if too small (assembler will crash if too small)
};
class x86 {
@@ -146,8 +146,17 @@ class x86 {
static address _vector_float_sign_flip;
static address _vector_double_sign_mask;
static address _vector_double_sign_flip;
- static address _vector_byte_perm_mask;
static address _vector_long_sign_mask;
+ static address _vector_all_bits_set;
+ static address _vector_byte_perm_mask;
+ static address _vector_int_to_byte_mask;
+ static address _vector_int_to_short_mask;
+ static address _vector_32_bit_mask;
+ static address _vector_64_bit_mask;
+ static address _vector_int_shuffle_mask;
+ static address _vector_short_shuffle_mask;
+ static address _vector_long_shuffle_mask;
+ static address _vector_iota_indices;
#ifdef _LP64
static juint _k256_W[];
static address _k256_W_adr;
@@ -248,13 +257,50 @@ class x86 {
return _vector_double_sign_flip;
}
+ static address vector_all_bits_set() {
+ return _vector_all_bits_set;
+ }
+
static address vector_byte_perm_mask() {
return _vector_byte_perm_mask;
}
+ static address vector_int_to_byte_mask() {
+ return _vector_int_to_byte_mask;
+ }
+
+ static address vector_int_to_short_mask() {
+ return _vector_int_to_short_mask;
+ }
+
+ static address vector_32_bit_mask() {
+ return _vector_32_bit_mask;
+ }
+
+ static address vector_64_bit_mask() {
+ return _vector_64_bit_mask;
+ }
+
+ static address vector_int_shuffle_mask() {
+ return _vector_int_shuffle_mask;
+ }
+
+ static address vector_short_shuffle_mask() {
+ return _vector_short_shuffle_mask;
+ }
+
+ static address vector_long_shuffle_mask() {
+ return _vector_long_shuffle_mask;
+ }
+
static address vector_long_sign_mask() {
return _vector_long_sign_mask;
}
+
+ static address vector_iota_indices() {
+ return _vector_iota_indices;
+ }
+
#ifdef _LP64
static address k256_W_addr() { return _k256_W_adr; }
static address k512_W_addr() { return _k512_W_addr; }
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
index e6b4b3c699b..072b3d144fa 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86.cpp
@@ -1106,11 +1106,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
Label Continue;
Label slow_path;
-#ifndef _LP64
- __ safepoint_poll(slow_path, thread, noreg);
-#else
- __ safepoint_poll(slow_path, r15_thread, rscratch1);
-#endif
+ __ safepoint_poll(slow_path, thread, true /* at_return */, false /* in_nmethod */);
__ cmpl(Address(thread, JavaThread::suspend_flags_offset()), 0);
__ jcc(Assembler::equal, Continue);
@@ -1765,9 +1761,6 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
address& vep) {
assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
Label L;
- aep = __ pc(); // atos entry point
- __ push_ptr();
- __ jmp(L);
#ifndef _LP64
fep = __ pc(); // ftos entry point
__ push(ftos);
@@ -1786,8 +1779,8 @@ void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
lep = __ pc(); // ltos entry point
__ push_l();
__ jmp(L);
- bep = cep = sep = iep = __ pc(); // [bcsi]tos entry point
- __ push_i();
+ aep = bep = cep = sep = iep = __ pc(); // [abcsi]tos entry point
+ __ push_i_or_ptr();
vep = __ pc(); // vtos entry point
__ bind(L);
generate_and_dispatch(t);
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
index 0fc994422fa..ad76dd4f8b6 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_32.cpp
@@ -62,7 +62,8 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path, noreg, rdi);
+ __ get_thread(rdi);
+ __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -111,7 +112,8 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path, noreg, rdi);
+ __ get_thread(rdi);
+ __ safepoint_poll(slow_path, rdi, false /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
diff --git a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
index 03def319620..664bf7bfa1c 100644
--- a/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
+++ b/src/hotspot/cpu/x86/templateInterpreterGenerator_x86_64.cpp
@@ -191,7 +191,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
// c_rarg1: scratch (rsi on non-Win64, rdx on Win64)
Label slow_path;
- __ safepoint_poll(slow_path, r15_thread, rscratch1);
+ __ safepoint_poll(slow_path, r15_thread, true /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -237,7 +237,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
// r13: senderSP must preserved for slow path, set SP to it on fast path
Label slow_path;
- __ safepoint_poll(slow_path, r15_thread, rscratch1);
+ __ safepoint_poll(slow_path, r15_thread, false /* at_return */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
diff --git a/src/hotspot/cpu/x86/templateTable_x86.cpp b/src/hotspot/cpu/x86/templateTable_x86.cpp
index 2f1cda2d5f9..9f0e5a8694d 100644
--- a/src/hotspot/cpu/x86/templateTable_x86.cpp
+++ b/src/hotspot/cpu/x86/templateTable_x86.cpp
@@ -2296,7 +2296,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ dispatch_only(vtos, true);
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
@@ -2658,16 +2658,16 @@ void TemplateTable::_return(TosState state) {
Label no_safepoint;
NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
#ifdef _LP64
- __ testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ __ testb(Address(r15_thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
#else
const Register thread = rdi;
__ get_thread(thread);
- __ testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ __ testb(Address(thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
#endif
__ jcc(Assembler::zero, no_safepoint);
__ push(state);
__ call_VM(noreg, CAST_FROM_FN_PTR(address,
- InterpreterRuntime::at_safepoint));
+ InterpreterRuntime::at_safepoint));
__ pop(state);
__ bind(no_safepoint);
}
diff --git a/src/hotspot/cpu/x86/vm_version_ext_x86.cpp b/src/hotspot/cpu/x86/vm_version_ext_x86.cpp
index 8042a8f1c69..35d07d71e46 100644
--- a/src/hotspot/cpu/x86/vm_version_ext_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_ext_x86.cpp
@@ -256,7 +256,7 @@ const size_t VM_Version_Ext::CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
const size_t VM_Version_Ext::CPU_TYPE_DESC_BUF_SIZE = 256;
const size_t VM_Version_Ext::CPU_DETAILED_DESC_BUF_SIZE = 4096;
char* VM_Version_Ext::_cpu_brand_string = NULL;
-jlong VM_Version_Ext::_max_qualified_cpu_frequency = 0;
+int64_t VM_Version_Ext::_max_qualified_cpu_frequency = 0;
int VM_Version_Ext::_no_of_threads = 0;
int VM_Version_Ext::_no_of_cores = 0;
@@ -644,56 +644,50 @@ const char* VM_Version_Ext::cpu_description(void) {
}
/**
- * See Intel Application note 485 (chapter 10) for details
- * on frequency extraction from cpu brand string.
- * http://www.intel.com/content/dam/www/public/us/en/documents/application-notes/processor-identification-cpuid-instruction-note.pdf
+ * For information about extracting the frequency from the cpu brand string, please see:
*
+ * Intel Processor Identification and the CPUID Instruction
+ * Application Note 485
+ * May 2012
+ *
+ * The return value is the frequency in Hz.
*/
-jlong VM_Version_Ext::max_qualified_cpu_freq_from_brand_string(void) {
- // get brand string
+int64_t VM_Version_Ext::max_qualified_cpu_freq_from_brand_string(void) {
const char* const brand_string = cpu_brand_string();
if (brand_string == NULL) {
return 0;
}
-
- const u8 MEGA = 1000000;
- u8 multiplier = 0;
- jlong frequency = 0;
-
- // the frequency information in the cpu brand string
- // is given in either of two formats "x.xxyHz" or "xxxxyHz",
- // where y=M,G,T and x is digits
- const char* Hz_location = strchr(brand_string, 'H');
-
- if (Hz_location != NULL) {
- if (*(Hz_location + 1) == 'z') {
- // switch on y in "yHz"
- switch(*(Hz_location - 1)) {
- case 'M' :
- // Set multiplier to frequency is in Hz
- multiplier = MEGA;
- break;
- case 'G' :
- multiplier = MEGA * 1000;
- break;
- case 'T' :
- multiplier = MEGA * 1000 * 1000;
- break;
+ const int64_t MEGA = 1000000;
+ int64_t multiplier = 0;
+ int64_t frequency = 0;
+ uint8_t idx = 0;
+ // The brand string buffer is at most 48 bytes.
+ // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
+ for (; idx < 48-2; ++idx) {
+ // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
+ // Search brand string for "yHz" where y is M, G, or T.
+ if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
+ if (brand_string[idx] == 'M') {
+ multiplier = MEGA;
+ } else if (brand_string[idx] == 'G') {
+ multiplier = MEGA * 1000;
+ } else if (brand_string[idx] == 'T') {
+ multiplier = MEGA * MEGA;
}
+ break;
}
}
-
if (multiplier > 0) {
- // compute frequency (in Hz) from brand string
- if (*(Hz_location - 4) == '.') { // if format is "x.xx"
- frequency = (jlong)(*(Hz_location - 5) - '0') * (multiplier);
- frequency += (jlong)(*(Hz_location - 3) - '0') * (multiplier / 10);
- frequency += (jlong)(*(Hz_location - 2) - '0') * (multiplier / 100);
+ // Compute freqency (in Hz) from brand string.
+ if (brand_string[idx-3] == '.') { // if format is "x.xx"
+ frequency = (brand_string[idx-4] - '0') * multiplier;
+ frequency += (brand_string[idx-2] - '0') * multiplier / 10;
+ frequency += (brand_string[idx-1] - '0') * multiplier / 100;
} else { // format is "xxxx"
- frequency = (jlong)(*(Hz_location - 5) - '0') * 1000;
- frequency += (jlong)(*(Hz_location - 4) - '0') * 100;
- frequency += (jlong)(*(Hz_location - 3) - '0') * 10;
- frequency += (jlong)(*(Hz_location - 2) - '0');
+ frequency = (brand_string[idx-4] - '0') * 1000;
+ frequency += (brand_string[idx-3] - '0') * 100;
+ frequency += (brand_string[idx-2] - '0') * 10;
+ frequency += (brand_string[idx-1] - '0');
frequency *= multiplier;
}
}
@@ -701,7 +695,7 @@ jlong VM_Version_Ext::max_qualified_cpu_freq_from_brand_string(void) {
}
-jlong VM_Version_Ext::maximum_qualified_cpu_frequency(void) {
+int64_t VM_Version_Ext::maximum_qualified_cpu_frequency(void) {
if (_max_qualified_cpu_frequency == 0) {
_max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
}
diff --git a/src/hotspot/cpu/x86/vm_version_ext_x86.hpp b/src/hotspot/cpu/x86/vm_version_ext_x86.hpp
index 1a2134d8561..2d318dd390e 100644
--- a/src/hotspot/cpu/x86/vm_version_ext_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_ext_x86.hpp
@@ -55,7 +55,7 @@ class VM_Version_Ext : public VM_Version {
static int _no_of_cores;
static int _no_of_packages;
static char* _cpu_brand_string;
- static jlong _max_qualified_cpu_frequency;
+ static int64_t _max_qualified_cpu_frequency;
static const char* cpu_family_description(void);
static const char* cpu_model_description(void);
@@ -72,7 +72,7 @@ class VM_Version_Ext : public VM_Version {
// Returns bytes written excluding termninating null byte.
static size_t cpu_write_support_string(char* const buf, size_t buf_len);
static void resolve_cpu_information_details(void);
- static jlong max_qualified_cpu_freq_from_brand_string(void);
+ static int64_t max_qualified_cpu_freq_from_brand_string(void);
public:
// Offsets for cpuid asm stub brand string
@@ -93,7 +93,7 @@ class VM_Version_Ext : public VM_Version {
static int number_of_cores(void);
static int number_of_sockets(void);
- static jlong maximum_qualified_cpu_frequency(void);
+ static int64_t maximum_qualified_cpu_frequency(void);
static bool supports_tscinv_ext(void);
diff --git a/src/hotspot/cpu/x86/vm_version_x86.cpp b/src/hotspot/cpu/x86/vm_version_x86.cpp
index 089d720e88e..d44560438a0 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp
@@ -763,6 +763,8 @@ void VM_Version::get_processor_features() {
if (is_intel()) { // Intel cpus specific settings
if (is_knights_family()) {
_features &= ~CPU_VZEROUPPER;
+ _features &= ~CPU_AVX512BW;
+ _features &= ~CPU_AVX512VL;
}
}
@@ -979,6 +981,11 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
diff --git a/src/hotspot/cpu/x86/vm_version_x86.hpp b/src/hotspot/cpu/x86/vm_version_x86.hpp
index e2cf0e08489..5d91280e616 100644
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp
@@ -27,6 +27,7 @@
#include "memory/universe.hpp"
#include "runtime/abstract_vm_version.hpp"
+#include "utilities/macros.hpp"
class VM_Version : public Abstract_VM_Version {
friend class VMStructs;
@@ -1021,6 +1022,10 @@ enum Extended_Family {
return LP64_ONLY(true) NOT_LP64(false); // not implemented on x86_32
}
+ constexpr static bool supports_stack_watermark_barrier() {
+ return true;
+ }
+
// there are several insns to force cache line sync to memory which
// we can use to ensure mapped non-volatile memory is up to date with
// pending in-cache changes.
diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad
index 89ca2468a21..7551dfaa0fc 100644
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@@ -1097,6 +1097,7 @@ reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0
reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
+reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
%}
@@ -1165,6 +1166,64 @@ class HandlerImpl {
#endif
};
+
+inline uint vector_length(const Node* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->length();
+}
+
+inline uint vector_length(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->length();
+}
+
+inline uint vector_length_in_bytes(const Node* n) {
+ const TypeVect* vt = n->bottom_type()->is_vect();
+ return vt->length_in_bytes();
+}
+
+inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->length_in_bytes();
+}
+
+inline BasicType vector_element_basic_type(const Node *n) {
+ return n->bottom_type()->is_vect()->element_basic_type();
+}
+
+inline BasicType vector_element_basic_type(const MachNode *use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return def->bottom_type()->is_vect()->element_basic_type();
+}
+
+inline Assembler::AvxVectorLen vector_length_encoding(int bytes) {
+ switch(bytes) {
+ case 4: // fall-through
+ case 8: // fall-through
+ case 16: return Assembler::AVX_128bit;
+ case 32: return Assembler::AVX_256bit;
+ case 64: return Assembler::AVX_512bit;
+
+ default: {
+ ShouldNotReachHere();
+ return Assembler::AVX_NoVec;
+ }
+ }
+}
+
+static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
+ return vector_length_encoding(vector_length_in_bytes(n));
+}
+
+static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
+ uint def_idx = use->operand_index(opnd);
+ Node* def = use->in(def_idx);
+ return vector_length_encoding(def);
+}
+
class Node::PD {
public:
enum NodeFlags {
@@ -1262,6 +1321,18 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
return offset;
}
+Assembler::Width widthForType(BasicType bt) {
+ if (bt == T_BYTE) {
+ return Assembler::B;
+ } else if (bt == T_SHORT) {
+ return Assembler::W;
+ } else if (bt == T_INT) {
+ return Assembler::D;
+ } else {
+ assert(bt == T_LONG, "not a long: %s", type2name(bt));
+ return Assembler::Q;
+ }
+}
//=============================================================================
@@ -1278,8 +1349,16 @@ int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
static address double_signflip() { return (address)double_signflip_pool; }
#endif
static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
+ static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
+ static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
+ static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
+ static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
+ static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
+ static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
+ static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
+ static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
//=============================================================================
const bool Matcher::match_rule_supported(int opcode) {
@@ -1288,6 +1367,7 @@ const bool Matcher::match_rule_supported(int opcode) {
}
switch (opcode) {
case Op_AbsVL:
+ case Op_StoreVectorScatter:
if (UseAVX < 3) {
return false;
}
@@ -1309,11 +1389,20 @@ const bool Matcher::match_rule_supported(int opcode) {
}
break;
case Op_MulVL:
+ if (UseSSE < 4) { // only with SSE4_1 or AVX
+ return false;
+ }
+ break;
case Op_MulReductionVL:
if (VM_Version::supports_avx512dq() == false) {
return false;
}
break;
+ case Op_AddReductionVL:
+ if (UseSSE < 2) { // requires at least SSE2
+ return false;
+ }
+ break;
case Op_AbsVB:
case Op_AbsVS:
case Op_AbsVI:
@@ -1325,6 +1414,8 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
case Op_MulReductionVI:
if (UseSSE < 4) { // requires at least SSE4
return false;
@@ -1332,6 +1423,13 @@ const bool Matcher::match_rule_supported(int opcode) {
break;
case Op_SqrtVD:
case Op_SqrtVF:
+ case Op_VectorMaskCmp:
+ case Op_VectorCastB2X:
+ case Op_VectorCastS2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastD2X:
if (UseAVX < 1) { // enabled for AVX only
return false;
}
@@ -1346,7 +1444,7 @@ const bool Matcher::match_rule_supported(int opcode) {
break;
case Op_CMoveVF:
case Op_CMoveVD:
- if (UseAVX < 1 || UseAVX > 2) {
+ if (UseAVX < 1) { // enabled for AVX only
return false;
}
break;
@@ -1369,6 +1467,10 @@ const bool Matcher::match_rule_supported(int opcode) {
case Op_LShiftVB:
case Op_RShiftVB:
case Op_URShiftVB:
+ case Op_VectorInsert:
+ case Op_VectorLoadMask:
+ case Op_VectorStoreMask:
+ case Op_VectorBlend:
if (UseSSE < 4) {
return false;
}
@@ -1390,6 +1492,9 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
+ case Op_ExtractB:
+ case Op_ExtractL:
+ case Op_ExtractI:
case Op_RoundDoubleMode:
if (UseSSE < 4) {
return false;
@@ -1400,6 +1505,17 @@ const bool Matcher::match_rule_supported(int opcode) {
return false; // 128bit vroundpd is not available
}
break;
+ case Op_LoadVectorGather:
+ if (UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_FmaVD:
+ case Op_FmaVF:
+ if (!UseFMA) {
+ return false;
+ }
+ break;
case Op_MacroLogicV:
if (UseAVX < 3 || !UseVectorMacroLogic) {
return false;
@@ -1460,8 +1576,9 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_AbsVD:
case Op_NegVD:
+ case Op_MulVL:
if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
- return false; // 512bit vandpd and vxorpd are not available
+ return false; // 512bit vpmullq, vandpd and vxorpd are not available
}
break;
case Op_CMoveVF:
@@ -1482,6 +1599,142 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
return false; // implementation limitation (only vcmov4D_reg is present)
}
break;
+ case Op_MaxV:
+ case Op_MinV:
+ if (UseSSE < 4 && is_integral_type(bt)) {
+ return false;
+ }
+ if ((bt == T_FLOAT || bt == T_DOUBLE)) {
+ // Float/Double intrinsics are enabled for AVX family currently.
+ if (UseAVX == 0) {
+ return false;
+ }
+ if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ
+ return false;
+ }
+ }
+ break;
+ case Op_AddReductionVI:
+ if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) {
+ return false;
+ }
+ // fallthrough
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ if (is_subword_type(bt) && (UseSSE < 4)) {
+ return false;
+ }
+#ifndef _LP64
+ if (bt == T_BYTE || bt == T_LONG) {
+ return false;
+ }
+#endif
+ break;
+#ifndef _LP64
+ case Op_VectorInsert:
+ if (bt == T_LONG || bt == T_DOUBLE) {
+ return false;
+ }
+ break;
+#endif
+ case Op_MinReductionV:
+ case Op_MaxReductionV:
+ if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) {
+ return false;
+ } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) {
+ return false;
+ }
+ // Float/Double intrinsics enabled for AVX family.
+ if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) {
+ return false;
+ }
+ if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) {
+ return false;
+ }
+#ifndef _LP64
+ if (bt == T_BYTE || bt == T_LONG) {
+ return false;
+ }
+#endif
+ break;
+ case Op_VectorTest:
+ if (UseSSE < 4) {
+ return false; // Implementation limitation
+ } else if (size_in_bits < 128) {
+ return false; // Implementation limitation
+ } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
+ if(vlen == 2) {
+ return false; // Implementation limitation due to how shuffle is loaded
+ } else if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ } else if (bt == T_BYTE && size_in_bits >= 256 && !VM_Version::supports_avx512_vbmi()) {
+ return false; // Implementation limitation
+ } else if (bt == T_SHORT && size_in_bits >= 256 && !VM_Version::supports_avx512bw()) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorLoadMask:
+ if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ }
+ // fallthrough
+ case Op_VectorStoreMask:
+ if (vlen == 2) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorCastB2X:
+ if (size_in_bits == 256 && UseAVX < 2) {
+ return false; // Implementation limitation
+ }
+ break;
+ case Op_VectorCastS2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_VectorCastI2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ }
+ break;
+ case Op_VectorCastL2X:
+ if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
+ return false;
+ } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
+ return false;
+ }
+ break;
+ case Op_VectorCastF2X:
+ case Op_VectorCastD2X:
+ if (is_integral_type(bt)) {
+ // Casts from FP to integral types require special fixup logic not easily
+ // implementable with vectors.
+ return false; // Implementation limitation
+ }
+ case Op_MulReductionVI:
+ if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
+ return false;
+ }
+ break;
+ case Op_StoreVectorScatter:
+ if(bt == T_BYTE || bt == T_SHORT) {
+ return false;
+ } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
+ return false;
+ }
+ // fallthrough
+ case Op_LoadVectorGather:
+ if (size_in_bits == 64 ) {
+ return false;
+ }
+ break;
}
return true; // Per default match rules are supported.
}
@@ -1540,6 +1793,10 @@ bool Matcher::is_generic_vector(MachOper* opnd) {
//------------------------------------------------------------------------
+bool Matcher::supports_vector_variable_shifts(void) {
+ return (UseAVX >= 2);
+}
+
const bool Matcher::has_predicated_vectors(void) {
bool ret_value = false;
if (UseAVX > 2) {
@@ -1831,40 +2088,28 @@ bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack,
void Compile::reshape_address(AddPNode* addp) {
}
-static inline uint vector_length(const MachNode* n) {
- const TypeVect* vt = n->bottom_type()->is_vect();
- return vt->length();
-}
-
-static inline uint vector_length(const MachNode* use, MachOper* opnd) {
- uint def_idx = use->operand_index(opnd);
- Node* def = use->in(def_idx);
- return def->bottom_type()->is_vect()->length();
-}
-
-static inline uint vector_length_in_bytes(const MachNode* n) {
- const TypeVect* vt = n->bottom_type()->is_vect();
- return vt->length_in_bytes();
-}
-
-static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
- uint def_idx = use->operand_index(opnd);
- Node* def = use->in(def_idx);
- return def->bottom_type()->is_vect()->length_in_bytes();
+static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) {
+ switch (bt) {
+ case BoolTest::eq: return Assembler::eq;
+ case BoolTest::ne: return Assembler::neq;
+ case BoolTest::le: return Assembler::le;
+ case BoolTest::ge: return Assembler::nlt;
+ case BoolTest::lt: return Assembler::lt;
+ case BoolTest::gt: return Assembler::nle;
+ default : ShouldNotReachHere(); return Assembler::_false;
+ }
}
-static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) {
- switch(vector_length_in_bytes(n)) {
- case 4: // fall-through
- case 8: // fall-through
- case 16: return Assembler::AVX_128bit;
- case 32: return Assembler::AVX_256bit;
- case 64: return Assembler::AVX_512bit;
-
- default: {
- ShouldNotReachHere();
- return Assembler::AVX_NoVec;
- }
+static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) {
+ switch (bt) {
+ case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling
+ // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare.
+ case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling
+ case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling
+ case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling
+ case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling
+ case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling
+ default: ShouldNotReachHere(); return Assembler::FALSE_OS;
}
}
@@ -2191,6 +2436,13 @@ encode %{
%}
+// Operands for bound floating pointer register arguments
+operand rxmm0() %{
+ constraint(ALLOC_IN_RC(xmm0_reg));
+ match(VecX);
+ format%{%}
+ interface(REG_INTER);
+%}
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
@@ -2957,9 +3209,9 @@ instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
ins_cost(150);
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
ins_encode %{
- int vector_len = 0;
+ int vlen_enc = Assembler::AVX_128bit;
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(float_signmask()), vector_len);
+ ExternalAddress(float_signmask()), vlen_enc);
%}
ins_pipe(pipe_slow);
%}
@@ -2983,9 +3235,9 @@ instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
"# abs double by sign masking" %}
ins_encode %{
- int vector_len = 0;
+ int vlen_enc = Assembler::AVX_128bit;
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
- ExternalAddress(double_signmask()), vector_len);
+ ExternalAddress(double_signmask()), vlen_enc);
%}
ins_pipe(pipe_slow);
%}
@@ -3109,6 +3361,93 @@ instruct sqrtD_imm(regD dst, immD con) %{
ins_pipe(pipe_slow);
%}
+// ---------------------------------------- VectorReinterpret ------------------------------------
+
+instruct reinterpret(vec dst) %{
+ predicate(vector_length_in_bytes(n) == vector_length_in_bytes(n->in(1))); // dst == src
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(125);
+ format %{ "vector_reinterpret $dst\t!" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX == 0 &&
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ effect(TEMP dst, TEMP scratch);
+ format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ assert(vector_length_in_bytes(this) <= 16, "required");
+ assert(vector_length_in_bytes(this, $src) <= 8, "required");
+
+ int src_vlen_in_bytes = vector_length_in_bytes(this, $src);
+ if (src_vlen_in_bytes == 4) {
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register);
+ } else {
+ assert(src_vlen_in_bytes == 8, "");
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register);
+ }
+ __ pand($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX > 0 &&
+ (vector_length_in_bytes(n->in(1)) == 4) && // src
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ effect(TEMP scratch);
+ format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
+instruct vreinterpret_expand(legVec dst, vec src) %{
+ predicate(UseAVX > 0 &&
+ (vector_length_in_bytes(n->in(1)) > 4) && // src
+ (vector_length_in_bytes(n->in(1)) < vector_length_in_bytes(n))); // src < dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ format %{ "vector_reinterpret_expand $dst,$src\t!" %}
+ ins_encode %{
+ switch (vector_length_in_bytes(this, $src)) {
+ case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
+ default: ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct reinterpret_shrink(vec dst, legVec src) %{
+ predicate(vector_length_in_bytes(n->in(1)) > vector_length_in_bytes(n)); // src > dst
+ match(Set dst (VectorReinterpret src));
+ ins_cost(125);
+ format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
+ ins_encode %{
+ switch (vector_length_in_bytes(this)) {
+ case 4: __ movflt ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
+ case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
+ default: ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ----------------------------------------------------------------------------------------------------
#ifdef _LP64
instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
@@ -3146,19 +3485,19 @@ instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
%}
instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
- predicate(n->as_Vector()->length() < 8);
+ predicate(vector_length(n) < 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
- predicate(n->as_Vector()->length() == 8);
+ predicate(vector_length(n) == 8);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
ins_encode %{
@@ -3169,19 +3508,19 @@ instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
%}
instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
- predicate(n->as_Vector()->length() < 8);
+ predicate(vector_length(n) < 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
- predicate(n->as_Vector()->length() == 8);
+ predicate(vector_length(n) == 8);
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
ins_encode %{
@@ -3253,7 +3592,7 @@ instruct MoveLeg2Vec(vec dst, legVec src) %{
// ============================================================================
-// Load vectors
+// Load vectors generic operand pattern
instruct loadV(vec dst, memory mem) %{
match(Set dst (LoadVector mem));
ins_cost(125);
@@ -3289,6 +3628,81 @@ instruct storeV(memory mem, vec src) %{
ins_pipe( pipe_slow );
%}
+// ---------------------------------------- Gather ------------------------------------
+
+// Gather INT, LONG, FLOAT, DOUBLE
+
+instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
+ predicate(vector_length_in_bytes(n) <= 32);
+ match(Set dst (LoadVectorGather mem idx));
+ effect(TEMP dst, TEMP tmp, TEMP mask);
+ format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(vector_length_in_bytes(this) >= 16, "sanity");
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ if (vlen_enc == Assembler::AVX_128bit) {
+ __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
+ } else {
+ __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
+ }
+ __ lea($tmp$$Register, $mem$$Address);
+ __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
+ predicate(vector_length_in_bytes(n) == 64);
+ match(Set dst (LoadVectorGather mem idx));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ KRegister ktmp = k2;
+ __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
+ __ lea($tmp$$Register, $mem$$Address);
+ __ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ====================Scatter=======================================
+
+// Scatter INT, LONG, FLOAT, DOUBLE
+
+instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{
+ match(Set mem (StoreVectorScatter mem (Binary src idx)));
+ effect(TEMP tmp);
+ format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ int vlen_enc = vector_length_encoding(this, $src);
+ BasicType elem_bt = vector_element_basic_type(this, $src);
+
+ assert(vector_length_in_bytes(this, $src) >= 16, "sanity");
+ assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
+
+ KRegister ktmp = k2;
+ __ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
+ __ lea($tmp$$Register, $mem$$Address);
+ __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// ====================REPLICATE=======================================
// Replicate byte scalar to be vector
@@ -3322,8 +3736,8 @@ instruct ReplB_mem(vec dst, memory mem) %{
match(Set dst (ReplicateB (LoadB mem)));
format %{ "replicateB $dst,$mem" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -3353,7 +3767,7 @@ instruct ReplB_imm(vec dst, immI con) %{
%}
// Replicate byte scalar zero to be vector
-instruct ReplB_zero(vec dst, immI0 zero) %{
+instruct ReplB_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
format %{ "replicateB $dst,$zero" %}
ins_encode %{
@@ -3430,7 +3844,7 @@ instruct ReplS_imm(vec dst, immI con) %{
ins_pipe( fpu_reg_reg );
%}
-instruct ReplS_zero(vec dst, immI0 zero) %{
+instruct ReplS_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
format %{ "replicateS $dst,$zero" %}
ins_encode %{
@@ -3477,8 +3891,8 @@ instruct ReplI_mem(vec dst, memory mem) %{
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx2(), "sanity");
- int vector_len = vector_length_encoding(this);
- __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
@@ -3497,16 +3911,16 @@ instruct ReplI_imm(vec dst, immI con) %{
}
} else {
assert(VM_Version::supports_avx2(), "sanity");
- int vector_len = vector_length_encoding(this);
+ int vlen_enc = vector_length_encoding(this);
__ movq($dst$$XMMRegister, const_addr);
- __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
// Replicate integer (4 byte) scalar zero to be vector
-instruct ReplI_zero(vec dst, immI0 zero) %{
+instruct ReplI_zero(vec dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
format %{ "replicateI $dst,$zero" %}
ins_encode %{
@@ -3562,7 +3976,7 @@ instruct ReplL_reg(vec dst, rRegL src) %{
#else // _LP64
// Replicate long (8 byte) scalar to be vector
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
- predicate(n->as_Vector()->length() <= 4);
+ predicate(vector_length(n) <= 4);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
@@ -3574,11 +3988,11 @@ instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
- int vector_len = Assembler::AVX_256bit;
+ int vlen_enc = Assembler::AVX_256bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
} else {
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
@@ -3591,7 +4005,7 @@ instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
%}
instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
- predicate(n->as_Vector()->length() == 8);
+ predicate(vector_length(n) == 8);
match(Set dst (ReplicateL src));
effect(TEMP dst, USE src, TEMP tmp);
format %{ "replicateL $dst,$src" %}
@@ -3604,11 +4018,11 @@ instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
} else {
- int vector_len = Assembler::AVX_512bit;
+ int vlen_enc = Assembler::AVX_512bit;
__ movdl($dst$$XMMRegister, $src$$Register);
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
- __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
@@ -3689,8 +4103,8 @@ instruct ReplF_reg(vec dst, vlRegF src) %{
if (vlen <= 4) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
} else if (VM_Version::supports_avx2()) {
- int vector_len = vector_length_encoding(this);
- __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
+ int vlen_enc = vector_length_encoding(this);
+ __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 8, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
@@ -3710,8 +4124,8 @@ instruct ReplF_mem(vec dst, memory mem) %{
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
} else {
assert(VM_Version::supports_avx(), "sanity");
- int vector_len = vector_length_encoding(this);
- __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
@@ -3743,8 +4157,8 @@ instruct ReplD_reg(vec dst, vlRegD src) %{
if (vlen == 2) {
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
} else if (VM_Version::supports_avx2()) {
- int vector_len = vector_length_encoding(this);
- __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
+ int vlen_enc = vector_length_encoding(this);
+ __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2
} else {
assert(vlen == 4, "sanity");
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
@@ -3764,8 +4178,8 @@ instruct ReplD_mem(vec dst, memory mem) %{
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
} else {
assert(VM_Version::supports_avx(), "sanity");
- int vector_len = vector_length_encoding(this);
- __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc);
}
%}
ins_pipe( pipe_slow );
@@ -3786,35 +4200,239 @@ instruct ReplD_zero(vec dst, immD0 zero) %{
ins_pipe( fpu_reg_reg );
%}
-// ====================REDUCTION ARITHMETIC=======================================
-// =======================Int Reduction==========================================
+// ====================VECTOR INSERT=======================================
-instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
- n->in(2)->bottom_type()->is_vect()->length() < 16);
- match(Set dst (AddReductionVI src1 src2));
- match(Set dst (MulReductionVI src1 src2));
- match(Set dst (AndReductionV src1 src2));
- match(Set dst ( OrReductionV src1 src2));
- match(Set dst (XorReductionV src1 src2));
- effect(TEMP vtmp1, TEMP vtmp2);
- format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+instruct insert(vec dst, rRegI val, immU8 idx) %{
+ predicate(vector_length_in_bytes(n) < 32);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
ins_encode %{
- int opcode = this->ideal_Opcode();
- int vlen = vector_length(this, $src2);
- __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ assert(UseSSE >= 4, "required");
+ assert(vector_length_in_bytes(this) >= 8, "required");
+
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ assert(is_integral_type(elem_bt), "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{
+ predicate(vector_length_in_bytes(n) == 32);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_256bit;
+ BasicType elem_bt = vector_element_basic_type(this);
+ int elem_per_lane = 16/type2aelembytes(elem_bt);
+ int log2epr = log2(elem_per_lane);
+
+ assert(is_integral_type(elem_bt), "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(log2epr);
+ uint y_idx = ($idx$$constant >> log2epr) & 1;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{
+ predicate(vector_length_in_bytes(n) == 64);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "sanity");
+
+ BasicType elem_bt = vector_element_basic_type(this);
+ int elem_per_lane = 16/type2aelembytes(elem_bt);
+ int log2epr = log2(elem_per_lane);
+
+ assert(is_integral_type(elem_bt), "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(log2epr);
+ uint y_idx = ($idx$$constant >> log2epr) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+#ifdef _LP64
+instruct insert2L(vec dst, rRegL val, immU8 idx) %{
+ predicate(vector_length(n) == 2);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ assert(vector_element_basic_type(this) == T_LONG, "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{
+ predicate(vector_length(n) == 4);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_LONG, "");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{
+ predicate(vector_length(n) == 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_LONG, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
+instruct insertF(vec dst, regF val, immU8 idx) %{
+ predicate(vector_length(n) < 8);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ format %{ "vector_insert $dst,$val,$idx" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "sanity");
+
+ assert(vector_element_basic_type(this) == T_FLOAT, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ insertps($dst$$XMMRegister, $val$$XMMRegister, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{
+ predicate(vector_length(n) >= 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_FLOAT, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ int vlen = vector_length(this);
+ uint x_idx = $idx$$constant & right_n_bits(2);
+ if (vlen == 8) {
+ uint y_idx = ($idx$$constant >> 2) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ } else {
+ assert(vlen == 16, "sanity");
+ uint y_idx = ($idx$$constant >> 2) & 3;
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+#ifdef _LP64
+instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{
+ predicate(vector_length(n) == 2);
+ match(Set dst (VectorInsert (Binary dst val) idx));
+ effect(TEMP tmp);
+ format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "sanity");
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{
+ predicate(vector_length(n) == 4);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP vtmp, TEMP tmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 1;
+ int vlen_enc = Assembler::AVX_256bit;
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
+ __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{
+ predicate(vector_length(n) == 8);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ effect(TEMP tmp, TEMP vtmp);
+ format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %}
+ ins_encode %{
+ assert(vector_element_basic_type(this) == T_DOUBLE, "sanity");
+ assert($idx$$constant < (int)vector_length(this), "out of bounds");
+
+ uint x_idx = $idx$$constant & right_n_bits(1);
+ uint y_idx = ($idx$$constant >> 1) & 3;
+ __ movq($tmp$$Register, $val$$XMMRegister);
+ __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx);
+ __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx);
+ __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx);
%}
ins_pipe( pipe_slow );
%}
+#endif
-instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
- n->in(2)->bottom_type()->is_vect()->length() == 16);
+// ====================REDUCTION ARITHMETIC=======================================
+
+// =======================Int Reduction==========================================
+
+instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_INT); // src2
match(Set dst (AddReductionVI src1 src2));
match(Set dst (MulReductionVI src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -3828,14 +4446,15 @@ instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec v
// =======================Long Reduction==========================================
#ifdef _LP64
-instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
- n->in(2)->bottom_type()->is_vect()->length() < 8);
+instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq());
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -3846,14 +4465,15 @@ instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
ins_pipe( pipe_slow );
%}
-instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
- n->in(2)->bottom_type()->is_vect()->length() == 8);
+instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq());
match(Set dst (AddReductionVL src1 src2));
match(Set dst (MulReductionVL src1 src2));
match(Set dst (AndReductionV src1 src2));
match(Set dst ( OrReductionV src1 src2));
match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
effect(TEMP vtmp1, TEMP vtmp2);
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
ins_encode %{
@@ -3868,11 +4488,11 @@ instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vt
// =======================Float Reduction==========================================
instruct reductionF128(regF dst, vec src, vec vtmp) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4);
+ predicate(vector_length(n->in(2)) <= 4); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp);
- format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %}
+ format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %}
ins_encode %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
@@ -3882,7 +4502,7 @@ instruct reductionF128(regF dst, vec src, vec vtmp) %{
%}
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
+ predicate(vector_length(n->in(2)) == 8); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -3896,7 +4516,7 @@ instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
%}
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() == 16);
+ predicate(vector_length(n->in(2)) == 16); // src
match(Set dst (AddReductionVF dst src));
match(Set dst (MulReductionVF dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -3912,7 +4532,7 @@ instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
// =======================Double Reduction==========================================
instruct reduction2D(regD dst, vec src, vec vtmp) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() == 2);
+ predicate(vector_length(n->in(2)) == 2); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp);
@@ -3921,12 +4541,12 @@ instruct reduction2D(regD dst, vec src, vec vtmp) %{
int opcode = this->ideal_Opcode();
int vlen = vector_length(this, $src);
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
- %}
+%}
ins_pipe( pipe_slow );
%}
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() == 4);
+ predicate(vector_length(n->in(2)) == 4); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -3940,7 +4560,7 @@ instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
%}
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
- predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
+ predicate(vector_length(n->in(2)) == 8); // src
match(Set dst (AddReductionVD dst src));
match(Set dst (MulReductionVD dst src));
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
@@ -3953,6 +4573,267 @@ instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
ins_pipe( pipe_slow );
%}
+// =======================Byte Reduction==========================================
+
+#ifdef _LP64
+instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw());
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw());
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
+// =======================Short Reduction==========================================
+
+instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_SHORT); // src2
+ match(Set dst (AddReductionVI src1 src2));
+ match(Set dst (MulReductionVI src1 src2));
+ match(Set dst (AndReductionV src1 src2));
+ match(Set dst ( OrReductionV src1 src2));
+ match(Set dst (XorReductionV src1 src2));
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// =======================Mul Reduction==========================================
+
+instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) <= 32); // src2
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_BYTE &&
+ vector_length(n->in(2)) == 64); // src2
+ match(Set dst (MulReductionVI src1 src2));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
+ format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//--------------------Min/Max Float Reduction --------------------
+// Float Min Reduction
+instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp,
+ legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
+instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp,
+ legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_FLOAT &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr);
+ format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister,
+ $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
+//--------------------Min Double Reduction --------------------
+instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmax_reductionD(legRegD dst, immD src1, legVec src2,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) ||
+ (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV src1 src2));
+ match(Set dst (MaxReductionV src1 src2));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
+ format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src2);
+ __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
+instruct minmax_reduction2D_av(legRegD dst, legVec src,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ vector_length(n->in(2)) == 2);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
+ format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct minmax_reductionD_av(legRegD dst, legVec src,
+ legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs
+ rFlagsReg cr) %{
+ predicate(vector_element_basic_type(n->in(2)) == T_DOUBLE &&
+ vector_length(n->in(2)) >= 4);
+ match(Set dst (MinReductionV dst src));
+ match(Set dst (MaxReductionV dst src));
+ effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr);
+ format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "sanity");
+
+ int opcode = this->ideal_Opcode();
+ int vlen = vector_length(this, $src);
+ __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister,
+ $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// ====================VECTOR ARITHMETIC=======================================
// --------------------------------- ADD --------------------------------------
@@ -3973,8 +4854,8 @@ instruct vaddB_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVB src1 src2));
format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -3984,8 +4865,8 @@ instruct vaddB_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVB src (LoadVector mem)));
format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4006,8 +4887,8 @@ instruct vaddS_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVS src1 src2));
format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4017,8 +4898,8 @@ instruct vaddS_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVS src (LoadVector mem)));
format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4039,8 +4920,8 @@ instruct vaddI_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVI src1 src2));
format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4051,8 +4932,8 @@ instruct vaddI_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVI src (LoadVector mem)));
format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4073,8 +4954,8 @@ instruct vaddL_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVL src1 src2));
format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4084,8 +4965,8 @@ instruct vaddL_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVL src (LoadVector mem)));
format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4106,8 +4987,8 @@ instruct vaddF_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVF src1 src2));
format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4117,8 +4998,8 @@ instruct vaddF_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVF src (LoadVector mem)));
format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4139,8 +5020,8 @@ instruct vaddD_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AddVD src1 src2));
format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4150,8 +5031,8 @@ instruct vaddD_mem(vec dst, vec src, memory mem) %{
match(Set dst (AddVD src (LoadVector mem)));
format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4174,8 +5055,8 @@ instruct vsubB_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVB src1 src2));
format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4185,8 +5066,8 @@ instruct vsubB_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVB src (LoadVector mem)));
format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4208,8 +5089,8 @@ instruct vsubS_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVS src1 src2));
format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4219,8 +5100,8 @@ instruct vsubS_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVS src (LoadVector mem)));
format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4241,8 +5122,8 @@ instruct vsubI_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVI src1 src2));
format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4252,8 +5133,8 @@ instruct vsubI_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVI src (LoadVector mem)));
format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4274,8 +5155,8 @@ instruct vsubL_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVL src1 src2));
format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4286,8 +5167,8 @@ instruct vsubL_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVL src (LoadVector mem)));
format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4308,8 +5189,8 @@ instruct vsubF_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVF src1 src2));
format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4319,8 +5200,8 @@ instruct vsubF_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVF src (LoadVector mem)));
format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4341,8 +5222,8 @@ instruct vsubD_reg(vec dst, vec src1, vec src2) %{
match(Set dst (SubVD src1 src2));
format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4352,8 +5233,8 @@ instruct vsubD_mem(vec dst, vec src, memory mem) %{
match(Set dst (SubVD src (LoadVector mem)));
format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4362,8 +5243,8 @@ instruct vsubD_mem(vec dst, vec src, memory mem) %{
// Byte vector mul
instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 4 ||
- n->as_Vector()->length() == 8);
+ predicate(vector_length(n) == 4 ||
+ vector_length(n) == 8);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
@@ -4380,7 +5261,7 @@ instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
%}
instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
+ predicate(vector_length(n) == 16 && UseAVX <= 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
@@ -4403,17 +5284,17 @@ instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scrat
%}
instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
+ predicate(vector_length(n) == 16 && UseAVX > 1);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
- int vector_len = Assembler::AVX_256bit;
- __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
- __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
- __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
- __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
__ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
%}
@@ -4421,54 +5302,54 @@ instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
%}
instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 32);
+ predicate(vector_length(n) == 32);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2" %}
ins_encode %{
assert(UseAVX > 1, "required");
- int vector_len = Assembler::AVX_256bit;
+ int vlen_enc = Assembler::AVX_256bit;
__ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
- __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
- __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
- __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
- __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
- __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
- __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
- __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
+ __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 64);
+ predicate(vector_length(n) == 64);
match(Set dst (MulVB src1 src2));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_mulB $dst,$src1,$src2\n\t" %}
ins_encode %{
assert(UseAVX > 2, "required");
- int vector_len = Assembler::AVX_512bit;
+ int vlen_enc = Assembler::AVX_512bit;
__ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
__ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
- __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
- __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
- __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
- __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc);
+ __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
- __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
- __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
- __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
+ __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register);
+ __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4489,8 +5370,8 @@ instruct vmulS_reg(vec dst, vec src1, vec src2) %{
match(Set dst (MulVS src1 src2));
format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4500,8 +5381,8 @@ instruct vmulS_mem(vec dst, vec src, memory mem) %{
match(Set dst (MulVS src (LoadVector mem)));
format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4523,8 +5404,8 @@ instruct vmulI_reg(vec dst, vec src1, vec src2) %{
match(Set dst (MulVI src1 src2));
format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4534,31 +5415,84 @@ instruct vmulI_mem(vec dst, vec src, memory mem) %{
match(Set dst (MulVI src (LoadVector mem)));
format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
// Longs vector mul
instruct vmulL_reg(vec dst, vec src1, vec src2) %{
+ predicate(VM_Version::supports_avx512dq());
match(Set dst (MulVL src1 src2));
format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
- int vector_len = vector_length_encoding(this);
- __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vmulL_mem(vec dst, vec src, memory mem) %{
+ predicate(VM_Version::supports_avx512dq());
match(Set dst (MulVL src (LoadVector mem)));
format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
- int vector_len = vector_length_encoding(this);
- __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{
+ predicate(vector_length(n) == 2 && !VM_Version::supports_avx512dq());
+ match(Set dst (MulVL dst src2));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "pshufd $tmp,$src2, 177\n\t"
+ "pmulld $tmp,$dst\n\t"
+ "phaddd $tmp,$tmp\n\t"
+ "pmovzxdq $tmp,$tmp\n\t"
+ "psllq $tmp, 32\n\t"
+ "pmuludq $dst,$src2\n\t"
+ "paddq $dst,$tmp\n\t! mul packed2L" %}
+
+ ins_encode %{
+ assert(VM_Version::supports_sse4_1(), "required");
+ int vlen_enc = Assembler::AVX_128bit;
+ __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177);
+ __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister);
+ __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister);
+ __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister);
+ __ psllq($tmp$$XMMRegister, 32);
+ __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister);
+ __ paddq($dst$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{
+ predicate(vector_length(n) == 4 && !VM_Version::supports_avx512dq());
+ match(Set dst (MulVL src1 src2));
+ effect(TEMP tmp1, TEMP tmp);
+ format %{ "vpshufd $tmp,$src2\n\t"
+ "vpmulld $tmp,$src1,$tmp\n\t"
+ "vphaddd $tmp,$tmp,$tmp\n\t"
+ "vpmovzxdq $tmp,$tmp\n\t"
+ "vpsllq $tmp,$tmp\n\t"
+ "vpmuludq $tmp1,$src1,$src2\n\t"
+ "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc);
+ __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
+ __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister);
+ __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
+ __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
+ __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc);
+ __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4579,8 +5513,8 @@ instruct vmulF_reg(vec dst, vec src1, vec src2) %{
match(Set dst (MulVF src1 src2));
format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4590,8 +5524,8 @@ instruct vmulF_mem(vec dst, vec src, memory mem) %{
match(Set dst (MulVF src (LoadVector mem)));
format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4612,8 +5546,8 @@ instruct vmulD_reg(vec dst, vec src1, vec src2) %{
match(Set dst (MulVD src1 src2));
format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4623,40 +5557,44 @@ instruct vmulD_mem(vec dst, vec src, memory mem) %{
match(Set dst (MulVD src (LoadVector mem)));
format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+ predicate(vector_length(n) == 8);
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
%}
ins_encode %{
- int vector_len = 1;
+ assert(UseAVX > 0, "required");
+
+ int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
- __ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
- __ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
+ __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
- predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+ predicate(vector_length(n) == 4);
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
effect(TEMP dst, USE src1, USE src2);
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
- "blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
+ "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
%}
ins_encode %{
- int vector_len = 1;
+ assert(UseAVX > 0, "required");
+
+ int vlen_enc = Assembler::AVX_256bit;
int cond = (Assembler::Condition)($copnd$$cmpcode);
- __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
- __ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc);
+ __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4679,8 +5617,8 @@ instruct vdivF_reg(vec dst, vec src1, vec src2) %{
match(Set dst (DivVF src1 src2));
format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4690,8 +5628,8 @@ instruct vdivF_mem(vec dst, vec src, memory mem) %{
match(Set dst (DivVF src (LoadVector mem)));
format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4712,8 +5650,8 @@ instruct vdivD_reg(vec dst, vec src1, vec src2) %{
match(Set dst (DivVD src1 src2));
format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4723,8 +5661,145 @@ instruct vdivD_mem(vec dst, vec src, memory mem) %{
match(Set dst (DivVD src (LoadVector mem)));
format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ MinMax ---------------------------------------
+
+// Byte, Short, Int vector Min/Max
+instruct minmax_reg_sse(vec dst, vec src) %{
+ predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
+ UseAVX == 0);
+ match(Set dst (MinV dst src));
+ match(Set dst (MaxV dst src));
+ format %{ "vector_minmax $dst,$src\t! " %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+
+ int opcode = this->ideal_Opcode();
+ BasicType elem_bt = vector_element_basic_type(this);
+ __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vminmax_reg(vec dst, vec src1, vec src2) %{
+ predicate(is_integral_type(vector_element_basic_type(n)) && vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT
+ UseAVX > 0);
+ match(Set dst (MinV src1 src2));
+ match(Set dst (MaxV src1 src2));
+ format %{ "vector_minmax $dst,$src1,$src2\t! " %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Long vector Min/Max
+instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{
+ predicate(vector_length_in_bytes(n) == 16 && vector_element_basic_type(n) == T_LONG &&
+ UseAVX == 0);
+ match(Set dst (MinV dst src));
+ match(Set dst (MaxV src dst));
+ effect(TEMP dst, TEMP tmp);
+ format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+
+ int opcode = this->ideal_Opcode();
+ BasicType elem_bt = vector_element_basic_type(this);
+ assert(elem_bt == T_LONG, "sanity");
+
+ __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{
+ predicate(vector_length_in_bytes(n) <= 32 && vector_element_basic_type(n) == T_LONG &&
+ UseAVX > 0 && !VM_Version::supports_avx512vl());
+ match(Set dst (MinV src1 src2));
+ match(Set dst (MaxV src1 src2));
+ effect(TEMP dst);
+ format %{ "vector_minmaxL $dst,$src1,$src2\t! " %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ int opcode = this->ideal_Opcode();
+ BasicType elem_bt = vector_element_basic_type(this);
+ assert(elem_bt == T_LONG, "sanity");
+
+ __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{
+ predicate((vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) &&
+ vector_element_basic_type(n) == T_LONG);
+ match(Set dst (MinV src1 src2));
+ match(Set dst (MaxV src1 src2));
+ format %{ "vector_minmaxL $dst,$src1,src2\t! " %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ int opcode = this->ideal_Opcode();
+ BasicType elem_bt = vector_element_basic_type(this);
+ assert(elem_bt == T_LONG, "sanity");
+
+ __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Float/Double vector Min/Max
+instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{
+ predicate(vector_length_in_bytes(n) <= 32 &&
+ is_floating_point_type(vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE
+ UseAVX > 0);
+ match(Set dst (MinV a b));
+ match(Set dst (MaxV a b));
+ effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
+ format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ __ vminmax_fp(opcode, elem_bt,
+ $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
+ $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
+ predicate(vector_length_in_bytes(n) == 64 &&
+ is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
+ match(Set dst (MinV a b));
+ match(Set dst (MaxV a b));
+ effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp);
+ format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ KRegister ktmp = k1;
+ __ evminmax_fp(opcode, elem_bt,
+ $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
+ ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4736,8 +5811,8 @@ instruct vsqrtF_reg(vec dst, vec src) %{
format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4747,8 +5822,8 @@ instruct vsqrtF_mem(vec dst, memory mem) %{
format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4759,8 +5834,8 @@ instruct vsqrtD_reg(vec dst, vec src) %{
format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4770,8 +5845,8 @@ instruct vsqrtD_mem(vec dst, memory mem) %{
format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
ins_encode %{
assert(UseAVX > 0, "required");
- int vector_len = vector_length_encoding(this);
- __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4792,16 +5867,17 @@ instruct vshiftcnt(vec dst, rRegI cnt) %{
// Byte vector shift
instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
- predicate(n->as_Vector()->length() <= 8);
- match(Set dst (LShiftVB src shift));
- match(Set dst (RShiftVB src shift));
+ predicate(vector_length(n) <= 8 && VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
- __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
+ bool sign = (opcode != Op_URShiftVB);
+ __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
@@ -4811,20 +5887,21 @@ instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
%}
instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
- match(Set dst (LShiftVB src shift));
- match(Set dst (RShiftVB src shift));
+ predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
+ UseAVX <= 1);
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseSSE > 3, "required");
int opcode = this->ideal_Opcode();
-
- __ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
+ bool sign = (opcode != Op_URShiftVB);
+ __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister);
__ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
- __ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
+ __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
__ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
@@ -4835,18 +5912,20 @@ instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratc
%}
instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
- match(Set dst (LShiftVB src shift));
- match(Set dst (RShiftVB src shift));
+ predicate(vector_length(n) == 16 && VectorNode::is_vshift_cnt(n->in(2)) &&
+ UseAVX > 1);
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
int opcode = this->ideal_Opcode();
- int vector_len = Assembler::AVX_256bit;
- __ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
- __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
- __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
+ bool sign = (opcode != Op_URShiftVB);
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
%}
@@ -4854,52 +5933,54 @@ instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
%}
instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 32);
- match(Set dst (LShiftVB src shift));
- match(Set dst (RShiftVB src shift));
+ predicate(vector_length(n) == 32 && VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 1, "required");
int opcode = this->ideal_Opcode();
- int vector_len = Assembler::AVX_256bit;
+ bool sign = (opcode != Op_URShiftVB);
+ int vlen_enc = Assembler::AVX_256bit;
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
- __ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
- __ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
- __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
- __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
- __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
- __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
- __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
- __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
+ __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
+ __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
+ __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 64);
- match(Set dst (LShiftVB src shift));
- match(Set dst (RShiftVB src shift));
+ predicate(vector_length(n) == 64 && VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst (RShiftVB src shift));
match(Set dst (URShiftVB src shift));
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
format %{"vector_byte_shift $dst,$src,$shift" %}
ins_encode %{
assert(UseAVX > 2, "required");
int opcode = this->ideal_Opcode();
- int vector_len = Assembler::AVX_512bit;
+ bool sign = (opcode != Op_URShiftVB);
+ int vlen_enc = Assembler::AVX_512bit;
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
- __ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
- __ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
- __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
- __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
+ __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc);
+ __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc);
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
- __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
- __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
- __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
- __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
+ __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc);
+ __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register);
+ __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -4910,8 +5991,9 @@ instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI sc
// unsigned values.
// Shorts/Chars vector left shift
instruct vshiftS(vec dst, vec src, vec shift) %{
- match(Set dst (LShiftVS src shift));
- match(Set dst (RShiftVS src shift));
+ predicate(VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVS src shift));
+ match(Set dst ( RShiftVS src shift));
match(Set dst (URShiftVS src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
@@ -4940,16 +6022,17 @@ instruct vshiftS(vec dst, vec src, vec shift) %{
// Integers vector left shift
instruct vshiftI(vec dst, vec src, vec shift) %{
- match(Set dst (LShiftVI src shift));
- match(Set dst (RShiftVI src shift));
+ predicate(VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVI src shift));
+ match(Set dst ( RShiftVI src shift));
match(Set dst (URShiftVI src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
- int vector_len = vector_length_encoding(this);
- __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
} else {
int vlen = vector_length(this);
if (vlen == 2) {
@@ -4994,15 +6077,16 @@ instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{
// Longs vector shift
instruct vshiftL(vec dst, vec src, vec shift) %{
- match(Set dst (LShiftVL src shift));
+ predicate(VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVL src shift));
match(Set dst (URShiftVL src shift));
effect(TEMP dst, USE src, USE shift);
format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
ins_encode %{
int opcode = this->ideal_Opcode();
if (UseAVX > 0) {
- int vector_len = vector_length_encoding(this);
- __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
} else {
assert(vector_length(this) == 2, "");
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
@@ -5035,7 +6119,7 @@ instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{
// -------------------ArithmeticRightShift -----------------------------------
// Long vector arithmetic right shift
instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
- predicate(UseAVX <= 2);
+ predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX <= 2);
match(Set dst (RShiftVL src shift));
effect(TEMP dst, TEMP tmp, TEMP scratch);
format %{ "vshiftq $dst,$src,$shift" %}
@@ -5052,24 +6136,297 @@ instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch)
} else {
assert(vlen == 4, "sanity");
assert(UseAVX > 1, "required");
- int vector_len = Assembler::AVX_256bit;
- __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
__ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
- __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
- __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
- __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
+ __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
+ __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc);
}
%}
ins_pipe( pipe_slow );
%}
instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
- predicate(UseAVX > 2);
+ predicate(VectorNode::is_vshift_cnt(n->in(2)) && UseAVX > 2);
match(Set dst (RShiftVL src shift));
format %{ "vshiftq $dst,$src,$shift" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// ------------------- Variable Shift -----------------------------
+// Byte variable shift
+instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
+ predicate(vector_length(n) <= 8 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ !VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = Assembler::AVX_128bit;
+ __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
+ predicate(vector_length(n) == 16 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ !VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
+ format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = Assembler::AVX_128bit;
+ // Shift lower half and get word result in dst
+ __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
+
+ // Shift upper half and get word result in vtmp1
+ __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
+ __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
+ __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
+
+ // Merge and down convert the two word results to byte in dst
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{
+ predicate(vector_length(n) == 32 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ !VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch);
+ format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = Assembler::AVX_128bit;
+ // Process lower 128 bits and get result in dst
+ __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
+ __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0);
+ __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0);
+ __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0);
+
+ // Process higher 128 bits and get result in vtmp3
+ __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister);
+ __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
+ __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register);
+ __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0);
+ __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0);
+ __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
+ __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0);
+
+ // Merge the two results in dst
+ __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
+ predicate(vector_length(n) <= 32 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
+ predicate(vector_length(n) == 64 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVB src shift));
+ match(Set dst ( RShiftVB src shift));
+ match(Set dst (URShiftVB src shift));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
+ format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = Assembler::AVX_256bit;
+ __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register);
+ __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister);
+ __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister);
+ __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register);
+ __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Short variable shift
+instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{
+ predicate(vector_length(n) <= 8 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ !VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVS src shift));
+ match(Set dst ( RShiftVS src shift));
+ match(Set dst (URShiftVS src shift));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ bool sign = (opcode != Op_URShiftVS);
+ int vlen_enc = Assembler::AVX_256bit;
+ __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1);
+ __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1);
+ __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
+ __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister);
+ __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{
+ predicate(vector_length(n) == 16 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ !VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVS src shift));
+ match(Set dst ( RShiftVS src shift));
+ match(Set dst (URShiftVS src shift));
+ effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch);
+ format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ bool sign = (opcode != Op_URShiftVS);
+ int vlen_enc = Assembler::AVX_256bit;
+ // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP
+ __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
+ __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
+
+ // Shift upper half, with result in dst usign vtmp1 as TEMP
+ __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister);
+ __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister);
+ __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
+ __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
+
+ // Merge lower and upper half result into dst
+ __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{
+ predicate(!VectorNode::is_vshift_cnt(n->in(2)) &&
+ VM_Version::supports_avx512bw());
+ match(Set dst ( LShiftVS src shift));
+ match(Set dst ( RShiftVS src shift));
+ match(Set dst (URShiftVS src shift));
+ format %{ "vector_varshift_short $dst,$src,$shift\t!" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//Integer variable shift
+instruct vshiftI_var(vec dst, vec src, vec shift) %{
+ predicate(!VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVI src shift));
+ match(Set dst ( RShiftVI src shift));
+ match(Set dst (URShiftVI src shift));
+ format %{ "vector_varshift_int $dst,$src,$shift\t!" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//Long variable shift
+instruct vshiftL_var(vec dst, vec src, vec shift) %{
+ predicate(!VectorNode::is_vshift_cnt(n->in(2)));
+ match(Set dst ( LShiftVL src shift));
+ match(Set dst (URShiftVL src shift));
+ format %{ "vector_varshift_long $dst,$src,$shift\t!" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//Long variable right shift arithmetic
+instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{
+ predicate(vector_length(n) <= 4 &&
+ !VectorNode::is_vshift_cnt(n->in(2)) &&
+ UseAVX == 2);
+ match(Set dst (RShiftVL src shift));
+ effect(TEMP dst, TEMP vtmp);
+ format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc,
+ $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{
+ predicate(!VectorNode::is_vshift_cnt(n->in(2)) &&
+ UseAVX > 2);
+ match(Set dst (RShiftVL src shift));
+ format %{ "vector_varfshift_long $dst,$src,$shift\t!" %}
+ ins_encode %{
+ int opcode = this->ideal_Opcode();
+ int vlen_enc = vector_length_encoding(this);
+ __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5091,8 +6448,8 @@ instruct vand_reg(vec dst, vec src1, vec src2) %{
match(Set dst (AndV src1 src2));
format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5102,8 +6459,8 @@ instruct vand_mem(vec dst, vec src, memory mem) %{
match(Set dst (AndV src (LoadVector mem)));
format %{ "vpand $dst,$src,$mem\t! and vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5125,8 +6482,8 @@ instruct vor_reg(vec dst, vec src1, vec src2) %{
match(Set dst (OrV src1 src2));
format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5136,8 +6493,8 @@ instruct vor_mem(vec dst, vec src, memory mem) %{
match(Set dst (OrV src (LoadVector mem)));
format %{ "vpor $dst,$src,$mem\t! or vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5159,8 +6516,8 @@ instruct vxor_reg(vec dst, vec src1, vec src2) %{
match(Set dst (XorV src1 src2));
format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5170,19 +6527,617 @@ instruct vxor_mem(vec dst, vec src, memory mem) %{
match(Set dst (XorV src (LoadVector mem)));
format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
-// --------------------------------- ABS --------------------------------------
-// a = |a|
-instruct vabsB_reg(vec dst, vec src) %{
- match(Set dst (AbsVB src));
- format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
+// --------------------------------- VectorCast --------------------------------------
+
+instruct vcastBtoX(vec dst, vec src) %{
+ match(Set dst (VectorCastB2X src));
+ format %{ "vector_cast_b2x $dst,$src\t!" %}
ins_encode %{
- uint vlen = vector_length(this);
+ assert(UseAVX > 0, "required");
+
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ int vlen_enc = vector_length_encoding(this);
+ switch (to_elem_bt) {
+ case T_SHORT:
+ __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_INT:
+ __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_FLOAT:
+ __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ break;
+ case T_LONG:
+ __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_DOUBLE:
+ __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ break;
+
+ default: assert(false, "%s", type2name(to_elem_bt));
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct castStoX(vec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX <= 2 &&
+ vector_length(n->in(1)) <= 8 && // src
+ vector_element_basic_type(n) == T_BYTE);
+ effect(TEMP scratch);
+ match(Set dst (VectorCastS2X src));
+ format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(UseAVX <= 2 &&
+ vector_length(n->in(1)) == 16 && // src
+ vector_element_basic_type(n) == T_BYTE);
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ match(Set dst (VectorCastS2X src));
+ format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ int vlen_enc = vector_length_encoding(vector_length_in_bytes(this, $src));
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register);
+ __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastStoX_evex(vec dst, vec src) %{
+ predicate(UseAVX > 2 ||
+ (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src
+ match(Set dst (VectorCastS2X src));
+ format %{ "vector_cast_s2x $dst,$src\t!" %}
+ ins_encode %{
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int vlen_enc = vector_length_encoding(this);
+ switch (to_elem_bt) {
+ case T_BYTE:
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ break;
+ case T_INT:
+ __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_FLOAT:
+ __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ break;
+ case T_LONG:
+ __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_DOUBLE:
+ __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct castItoX(vec dst, vec src, rRegP scratch) %{
+ predicate(UseAVX <= 2 &&
+ (vector_length_in_bytes(n->in(1)) <= 16) &&
+ (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src
+ match(Set dst (VectorCastI2X src));
+ format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %}
+ effect(TEMP scratch);
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ int vlen_enc = vector_length_encoding(this, $src);
+
+ if (to_elem_bt == T_BYTE) {
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register);
+ __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ } else {
+ assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
+ __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
+ __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(UseAVX <= 2 &&
+ (vector_length_in_bytes(n->in(1)) == 32) &&
+ (vector_length_in_bytes(n) < vector_length_in_bytes(n->in(1)))); // dst < src
+ match(Set dst (VectorCastI2X src));
+ format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %}
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ int vlen_enc = vector_length_encoding(this, $src);
+
+ if (to_elem_bt == T_BYTE) {
+ __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register);
+ __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
+ __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
+ } else {
+ assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt));
+ __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register);
+ __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1);
+ __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastItoX_evex(vec dst, vec src) %{
+ predicate(UseAVX > 2 ||
+ (vector_length_in_bytes(n) >= vector_length_in_bytes(n->in(1)))); // dst >= src
+ match(Set dst (VectorCastI2X src));
+ format %{ "vector_cast_i2x $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ BasicType dst_elem_bt = vector_element_basic_type(this);
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ switch (dst_elem_bt) {
+ case T_BYTE:
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ break;
+ case T_SHORT:
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ break;
+ case T_FLOAT:
+ __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ break;
+ case T_LONG:
+ __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc);
+ break;
+ case T_DOUBLE:
+ __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{
+ predicate((vector_element_basic_type(n) == T_BYTE || vector_element_basic_type(n) == T_SHORT) &&
+ UseAVX <= 2);
+ match(Set dst (VectorCastL2X src));
+ effect(TEMP scratch);
+ format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 0, "required");
+
+ int vlen = vector_length_in_bytes(this, $src);
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask())
+ : ExternalAddress(vector_int_to_short_mask());
+ if (vlen <= 16) {
+ __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register);
+ __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
+ } else {
+ assert(vlen <= 32, "required");
+ __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit);
+ __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit);
+ __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register);
+ __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
+ }
+ if (to_elem_bt == T_BYTE) {
+ __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastLtoX_evex(vec dst, vec src) %{
+ predicate(UseAVX > 2 ||
+ (vector_element_basic_type(n) == T_INT ||
+ vector_element_basic_type(n) == T_FLOAT ||
+ vector_element_basic_type(n) == T_DOUBLE));
+ match(Set dst (VectorCastL2X src));
+ format %{ "vector_cast_l2x $dst,$src\t!" %}
+ ins_encode %{
+ BasicType to_elem_bt = vector_element_basic_type(this);
+ int vlen = vector_length_in_bytes(this, $src);
+ int vlen_enc = vector_length_encoding(this, $src);
+ switch (to_elem_bt) {
+ case T_BYTE:
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_SHORT:
+ if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_INT:
+ if (vlen == 8) {
+ if ($dst$$XMMRegister != $src$$XMMRegister) {
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ }
+ } else if (vlen == 16) {
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8);
+ } else if (vlen == 32) {
+ if (UseAVX > 2) {
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ } else {
+ __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc);
+ __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
+ }
+ } else { // vlen == 64
+ __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ }
+ break;
+ case T_FLOAT:
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
+ __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+ case T_DOUBLE:
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required");
+ __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ break;
+
+ default: assert(false, "%s", type2name(to_elem_bt));
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastFtoD_reg(vec dst, vec src) %{
+ predicate(vector_element_basic_type(n) == T_DOUBLE);
+ match(Set dst (VectorCastF2X src));
+ format %{ "vector_cast_f2x $dst,$src\t!" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcastDtoF_reg(vec dst, vec src) %{
+ predicate(vector_element_basic_type(n) == T_FLOAT);
+ match(Set dst (VectorCastD2X src));
+ format %{ "vector_cast_d2x $dst,$src\t!" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this, $src);
+ __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- VectorMaskCmp --------------------------------------
+
+instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
+ predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
+ vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
+ is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this, $src1);
+ Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
+ if (vector_element_basic_type(this, $src1) == T_FLOAT) {
+ __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ } else {
+ __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
+ predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
+ is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ effect(TEMP scratch);
+ format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_512bit;
+ Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ KRegister mask = k0; // The comparison itself is not being masked.
+ if (vector_element_basic_type(this, $src1) == T_FLOAT) {
+ __ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
+ } else {
+ __ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
+ predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
+ vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
+ is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ effect(TEMP scratch);
+ format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this, $src1);
+ Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
+ Assembler::Width ww = widthForType(vector_element_basic_type(this, $src1));
+ __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
+ predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
+ is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ effect(TEMP scratch);
+ format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = Assembler::AVX_512bit;
+ Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ KRegister mask = k0; // The comparison itself is not being masked.
+ bool merge = false;
+ BasicType src1_elem_bt = vector_element_basic_type(this, $src1);
+
+ switch (src1_elem_bt) {
+ case T_BYTE: {
+ __ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
+ break;
+ }
+ case T_SHORT: {
+ __ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
+ break;
+ }
+ case T_INT: {
+ __ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
+ break;
+ }
+ case T_LONG: {
+ __ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
+ __ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
+ break;
+ }
+
+ default: assert(false, "%s", type2name(src1_elem_bt));
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// Extract
+
+instruct extractI(rRegI dst, legVec src, immU8 idx) %{
+ predicate(vector_length_in_bytes(n->in(1)) <= 16); // src
+ match(Set dst (ExtractI src idx));
+ match(Set dst (ExtractS src idx));
+#ifdef _LP64
+ match(Set dst (ExtractB src idx));
+#endif
+ format %{ "extractI $dst,$src,$idx\t!" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ BasicType elem_bt = vector_element_basic_type(this, $src);
+ __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{
+ predicate(vector_length_in_bytes(n->in(1)) == 32 || // src
+ vector_length_in_bytes(n->in(1)) == 64); // src
+ match(Set dst (ExtractI src idx));
+ match(Set dst (ExtractS src idx));
+#ifdef _LP64
+ match(Set dst (ExtractB src idx));
+#endif
+ effect(TEMP vtmp);
+ format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ BasicType elem_bt = vector_element_basic_type(this, $src);
+ XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
+ __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+#ifdef _LP64
+instruct extractL(rRegL dst, legVec src, immU8 idx) %{
+ predicate(vector_length(n->in(1)) <= 2); // src
+ match(Set dst (ExtractL src idx));
+ format %{ "extractL $dst,$src,$idx\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{
+ predicate(vector_length(n->in(1)) == 4 || // src
+ vector_length(n->in(1)) == 8); // src
+ match(Set dst (ExtractL src idx));
+ effect(TEMP vtmp);
+ format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
+ __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
+instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{
+ predicate(vector_length(n->in(1)) <= 4);
+ match(Set dst (ExtractF src idx));
+ effect(TEMP dst, TEMP tmp, TEMP vtmp);
+ format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{
+ predicate(vector_length(n->in(1)/*src*/) == 8 ||
+ vector_length(n->in(1)/*src*/) == 16);
+ match(Set dst (ExtractF src idx));
+ effect(TEMP tmp, TEMP vtmp);
+ format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
+ __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct extractD(legRegD dst, legVec src, immU8 idx) %{
+ predicate(vector_length(n->in(1)) == 2); // src
+ match(Set dst (ExtractD src idx));
+ format %{ "extractD $dst,$src,$idx\t!" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{
+ predicate(vector_length(n->in(1)) == 4 || // src
+ vector_length(n->in(1)) == 8); // src
+ match(Set dst (ExtractD src idx));
+ effect(TEMP vtmp);
+ format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %}
+ ins_encode %{
+ assert($idx$$constant < (int)vector_length(this, $src), "out of bounds");
+
+ XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant);
+ __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- Vector Blend --------------------------------------
+
+instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
+ predicate(UseAVX == 0);
+ match(Set dst (VectorBlend (Binary dst src) mask));
+ format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
+ effect(TEMP tmp);
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+
+ if ($mask$$XMMRegister != $tmp$$XMMRegister) {
+ __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
+ }
+ __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
+ predicate(UseAVX > 0 &&
+ vector_length_in_bytes(n) <= 32 &&
+ is_integral_type(vector_element_basic_type(n)));
+ match(Set dst (VectorBlend (Binary src1 src2) mask));
+ format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
+ predicate(UseAVX > 0 &&
+ vector_length_in_bytes(n) <= 32 &&
+ !is_integral_type(vector_element_basic_type(n)));
+ match(Set dst (VectorBlend (Binary src1 src2) mask));
+ format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{
+ predicate(vector_length_in_bytes(n) == 64);
+ match(Set dst (VectorBlend (Binary src1 src2) mask));
+ format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
+ effect(TEMP scratch);
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_512bit;
+ BasicType elem_bt = vector_element_basic_type(this);
+ KRegister ktmp = k2;
+ __ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
+ __ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- ABS --------------------------------------
+// a = |a|
+instruct vabsB_reg(vec dst, vec src) %{
+ match(Set dst (AbsVB src));
+ format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
+ ins_encode %{
+ uint vlen = vector_length(this);
if (vlen <= 16) {
__ pabsb($dst$$XMMRegister, $src$$XMMRegister);
} else {
@@ -5228,8 +7183,11 @@ instruct vabsL_reg(vec dst, vec src) %{
format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
ins_encode %{
assert(UseAVX > 2, "required");
- int vector_len = vector_length_encoding(this);
- __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5237,7 +7195,7 @@ instruct vabsL_reg(vec dst, vec src) %{
// --------------------------------- ABSNEG --------------------------------------
instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
- predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F
+ predicate(vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
match(Set dst (AbsVF src));
match(Set dst (NegVF src));
effect(TEMP scratch);
@@ -5258,7 +7216,7 @@ instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
%}
instruct vabsneg4F(vec dst, rRegI scratch) %{
- predicate(n->as_Vector()->length() == 4);
+ predicate(vector_length(n) == 4);
match(Set dst (AbsVF dst));
match(Set dst (NegVF dst));
effect(TEMP scratch);
@@ -5290,6 +7248,504 @@ instruct vabsnegD(vec dst, vec src, rRegI scratch) %{
ins_pipe( pipe_slow );
%}
+//------------------------------------- VectorTest --------------------------------------------
+
+#ifdef _LP64
+instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ effect(KILL cr);
+ format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
+ ins_encode %{
+ int vlen = vector_length_in_bytes(this, $src1);
+ int vlen_enc = vector_length_encoding(vlen);
+ if (vlen <= 32) {
+ if (UseAVX == 0) {
+ assert(vlen <= 16, "required");
+ __ ptest($src1$$XMMRegister, $src2$$XMMRegister);
+ } else {
+ __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ }
+ } else {
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ kortestql(ktmp, ktmp);
+ }
+ __ setb(Assembler::carrySet, $dst$$Register);
+ __ movzbl($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ effect(KILL cr);
+ format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
+ ins_encode %{
+ int vlen = vector_length_in_bytes(this, $src1);
+ int vlen_enc = vector_length_encoding(vlen);
+ if (vlen <= 32) {
+ if (UseAVX == 0) {
+ assert(vlen <= 16, "required");
+ __ ptest($src1$$XMMRegister, $src2$$XMMRegister);
+ } else {
+ __ vptest($src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ }
+ } else {
+ KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
+ __ evpcmpeqb(ktmp, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
+ __ ktestql(ktmp, ktmp);
+ }
+ __ setb(Assembler::notZero, $dst$$Register);
+ __ movzbl($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+#endif
+
+//------------------------------------- LoadMask --------------------------------------------
+
+instruct loadMask(vec dst, vec src) %{
+ match(Set dst (VectorLoadMask src));
+ effect(TEMP dst);
+ format %{ "vector_loadmask_byte $dst,$src\n\t" %}
+ ins_encode %{
+ int vlen_in_bytes = vector_length_in_bytes(this);
+ BasicType elem_bt = vector_element_basic_type(this);
+
+ __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//------------------------------------- StoreMask --------------------------------------------
+
+instruct storeMask1B(vec dst, vec src, immI_1 size) %{
+ predicate(vector_length(n) < 64 || VM_Version::supports_avx512vlbw());
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ if (vector_length_in_bytes(this) <= 16) {
+ __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
+ } else {
+ assert(UseAVX >= 2, "required");
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ }
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeMask2B(vec dst, vec src, immI_2 size) %{
+ predicate(vector_length(n) <= 8);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\n\t" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{
+ predicate(vector_length(n) == 16 && !VM_Version::supports_avx512bw());
+ match(Set dst (VectorStoreMask src size));
+ effect(TEMP dst);
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
+ __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{
+ predicate(VM_Version::supports_avx512bw());
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeMask4B(vec dst, vec src, immI_4 size) %{
+ predicate (vector_length(n) <= 4 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
+ __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{
+ predicate(vector_length(n) == 8 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ effect(TEMP dst);
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
+ __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{
+ predicate(UseAVX > 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeMask8B(vec dst, vec src, immI_8 size) %{
+ predicate(vector_length(n) == 2 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ assert(UseSSE >= 3, "required");
+ __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
+ __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
+ __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
+ __ pabsb($dst$$XMMRegister, $dst$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{
+ predicate(vector_length(n) == 4 && UseAVX <= 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %}
+ effect(TEMP dst, TEMP vtmp);
+ ins_encode %{
+ int vlen_enc = Assembler::AVX_128bit;
+ __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
+ __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
+ __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
+ __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{
+ predicate(UseAVX > 2);
+ match(Set dst (VectorStoreMask src size));
+ format %{ "vector_store_mask $dst,$src\t!" %}
+ ins_encode %{
+ int src_vlen_enc = vector_length_encoding(this, $src);
+ int dst_vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ src_vlen_enc = Assembler::AVX_512bit;
+ }
+ __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
+ __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//-------------------------------- Load Iota Indices ----------------------------------
+
+instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
+ predicate(vector_element_basic_type(n) == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ effect(TEMP scratch);
+ format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
+ ins_encode %{
+ int vlen_in_bytes = vector_length_in_bytes(this);
+ __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+//-------------------------------- Rearrange ----------------------------------
+
+// LoadShuffle/Rearrange for Byte
+
+instruct loadShuffleB(vec dst) %{
+ predicate(vector_element_basic_type(n) == T_BYTE);
+ match(Set dst (VectorLoadShuffle dst));
+ format %{ "vector_load_shuffle $dst, $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeB(vec dst, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) < 32);
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeB_avx(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ __ vpshufb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, Assembler::AVX_256bit);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_BYTE &&
+ vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// LoadShuffle/Rearrange for Short
+
+instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ // Create a byte shuffle mask from short shuffle mask
+ // only byte shuffle instruction available on these platforms
+
+ // Multiply each shuffle by two to get byte index
+ __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister);
+ __ psllw($vtmp$$XMMRegister, 1);
+
+ // Duplicate to create 2 copies of byte index
+ __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
+ __ psllw($dst$$XMMRegister, 8);
+ __ por($dst$$XMMRegister, $vtmp$$XMMRegister);
+
+ // Add one to get alternate byte index
+ __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register);
+ __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeS(vec dst, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ vector_length(n) <= 8 && !VM_Version::supports_avx512bw());
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct loadShuffleS_evex(vec dst, vec src) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ VM_Version::supports_avx512bw());
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{
+ predicate(vector_element_basic_type(n) == T_SHORT &&
+ VM_Version::supports_avx512bw());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// LoadShuffle/Rearrange for Integer and Float
+
+instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ vector_length(n) == 4 && UseAVX < 2);
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+
+ // Create a byte shuffle mask from int shuffle mask
+ // only byte shuffle instruction available on these platforms
+
+ // Duplicate and multiply each shuffle by 4
+ __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister);
+ __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
+ __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0);
+ __ psllw($vtmp$$XMMRegister, 2);
+
+ // Duplicate again to create 4 copies of byte index
+ __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister);
+ __ psllw($dst$$XMMRegister, 8);
+ __ por($vtmp$$XMMRegister, $dst$$XMMRegister);
+
+ // Add 3,2,1,0 to get alternate byte index
+ __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register);
+ __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeI(vec dst, vec shuffle) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ vector_length(n) == 4 && UseAVX < 2);
+ match(Set dst (VectorRearrange dst shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $dst" %}
+ ins_encode %{
+ assert(UseSSE >= 4, "required");
+ __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct loadShuffleI_avx(vec dst, vec src) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ UseAVX >= 2);
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{
+ predicate((vector_element_basic_type(n) == T_INT || vector_element_basic_type(n) == T_FLOAT) &&
+ UseAVX >= 2);
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ int vlen_enc = vector_length_encoding(this);
+ if (vlen_enc == Assembler::AVX_128bit) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+// LoadShuffle/Rearrange for Long and Double
+
+instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ vector_length(n) < 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (VectorLoadShuffle src));
+ effect(TEMP dst, TEMP vtmp, TEMP scratch);
+ format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ // Create a double word shuffle mask from long shuffle mask
+ // only double word shuffle instruction available on these platforms
+
+ // Multiply each shuffle by two to get double word index
+ __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc);
+
+ // Duplicate each double word shuffle
+ __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc);
+ __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
+
+ // Add one to get alternate double word index
+ __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeL(vec dst, vec src, vec shuffle) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ vector_length(n) < 8 && !VM_Version::supports_avx512vl());
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ assert(UseAVX >= 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct loadShuffleL_evex(vec dst, vec src) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ (vector_length(n) == 8 || VM_Version::supports_avx512vl()));
+ match(Set dst (VectorLoadShuffle src));
+ format %{ "vector_load_shuffle $dst, $src" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{
+ predicate(is_double_word_type(vector_element_basic_type(n)) && // T_LONG, T_DOUBLE
+ (vector_length(n) == 8 || VM_Version::supports_avx512vl()));
+ match(Set dst (VectorRearrange src shuffle));
+ format %{ "vector_rearrange $dst, $shuffle, $src" %}
+ ins_encode %{
+ assert(UseAVX > 2, "required");
+
+ int vlen_enc = vector_length_encoding(this);
+ if (vlen_enc == Assembler::AVX_128bit) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// --------------------------------- FMA --------------------------------------
// a * b + c
@@ -5299,8 +7755,8 @@ instruct vfmaF_reg(vec a, vec b, vec c) %{
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
- int vector_len = vector_length_encoding(this);
- __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5311,8 +7767,8 @@ instruct vfmaF_mem(vec a, memory b, vec c) %{
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
- int vector_len = vector_length_encoding(this);
- __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5323,8 +7779,8 @@ instruct vfmaD_reg(vec a, vec b, vec c) %{
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
- int vector_len = vector_length_encoding(this);
- __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5335,8 +7791,8 @@ instruct vfmaD_mem(vec a, memory b, vec c) %{
ins_cost(150);
ins_encode %{
assert(UseFMA, "not enabled");
- int vector_len = vector_length_encoding(this);
- __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5346,7 +7802,7 @@ instruct vfmaD_mem(vec a, memory b, vec c) %{
instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
predicate(UseAVX == 0);
match(Set dst (MulAddVS2VI dst src1));
- format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %}
+ format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %}
ins_encode %{
__ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
%}
@@ -5358,8 +7814,8 @@ instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
match(Set dst (MulAddVS2VI src1 src2));
format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
ins_encode %{
- int vector_len = vector_length_encoding(this);
- __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@@ -5372,8 +7828,8 @@ instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
ins_encode %{
assert(UseAVX > 2, "required");
- int vector_len = vector_length_encoding(this);
- __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
ins_cost(10);
@@ -5387,8 +7843,8 @@ instruct vpopcountI(vec dst, vec src) %{
ins_encode %{
assert(UsePopCountInstruction, "not enabled");
- int vector_len = vector_length_encoding(this);
- __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+ int vlen_enc = vector_length_encoding(this);
+ __ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
diff --git a/src/hotspot/cpu/x86/x86_32.ad b/src/hotspot/cpu/x86/x86_32.ad
index e572f3ca895..130cdbed3e1 100644
--- a/src/hotspot/cpu/x86/x86_32.ad
+++ b/src/hotspot/cpu/x86/x86_32.ad
@@ -131,9 +131,7 @@ alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
-// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ )
-// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Class for no registers (empty set).
reg_class no_reg();
@@ -150,7 +148,6 @@ reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePoin
// Class for general registers
reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
// Class for general registers (excluding EBP).
-// This register class can be used for implicit null checks on win95.
// It is also safe for use by tailjumps (we don't want to allocate in ebp).
// Used also if the PreserveFramePointer flag is true.
reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
@@ -656,8 +653,9 @@ void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
}
st->print_cr("POPL EBP"); st->print("\t");
if (do_polling() && C->is_method_compilation()) {
- st->print("TEST PollPage,EAX\t! Poll Safepoint");
- st->cr(); st->print("\t");
+ st->print("CMPL rsp, poll_offset[thread] \n\t"
+ "JA #safepoint_stub\t"
+ "# Safepoint: poll for GC");
}
}
#endif
@@ -700,12 +698,16 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
}
if (do_polling() && C->is_method_compilation()) {
- Register pollReg = as_Register(EBX_enc);
+ Register thread = as_Register(EBX_enc);
MacroAssembler masm(&cbuf);
- masm.get_thread(pollReg);
- masm.movl(pollReg, Address(pollReg, in_bytes(Thread::polling_page_offset())));
- masm.relocate(relocInfo::poll_return_type);
- masm.testl(rax, Address(pollReg, 0));
+ __ get_thread(thread);
+ Label dummy_label;
+ Label* code_stub = &dummy_label;
+ if (!C->output()->in_scratch_emit_size()) {
+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+ }
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub, thread, true /* at_return */, true /* in_nmethod */);
}
}
@@ -1446,57 +1448,6 @@ const bool Matcher::rematerialize_float_constants = true;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
- // Get the memory operand from the node
- uint numopnds = node->num_opnds(); // Virtual call for number of operands
- uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
- assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
- uint opcnt = 1; // First operand
- uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
- while( idx >= skipped+num_edges ) {
- skipped += num_edges;
- opcnt++; // Bump operand count
- assert( opcnt < numopnds, "Accessing non-existent operand" );
- num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
- }
-
- MachOper *memory = node->_opnds[opcnt];
- MachOper *new_memory = NULL;
- switch (memory->opcode()) {
- case DIRECT:
- case INDOFFSET32X:
- // No transformation necessary.
- return;
- case INDIRECT:
- new_memory = new indirect_win95_safeOper( );
- break;
- case INDOFFSET8:
- new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
- break;
- case INDOFFSET32:
- new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
- break;
- case INDINDEXOFFSET:
- new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
- break;
- case INDINDEXSCALE:
- new_memory = new indIndexScale_win95_safeOper(memory->scale());
- break;
- case INDINDEXSCALEOFFSET:
- new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
- break;
- case LOAD_LONG_INDIRECT:
- case LOAD_LONG_INDOFFSET32:
- // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
- return;
- default:
- assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
- return;
- }
- node->_opnds[opcnt] = new_memory;
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = true;
@@ -3190,7 +3141,6 @@ frame %{
// These three registers define part of the calling convention
// between compiled code and the interpreter.
inline_cache_reg(EAX); // Inline Cache Register
- interpreter_method_reg(EBX); // Method Register when calling interpreter
// Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
cisc_spilling_operand_name(indOffset32);
@@ -3315,7 +3265,7 @@ operand immI() %{
%}
// Constant for test vs zero
-operand immI0() %{
+operand immI_0() %{
predicate(n->get_int() == 0);
match(ConI);
@@ -3325,7 +3275,7 @@ operand immI0() %{
%}
// Constant for increment
-operand immI1() %{
+operand immI_1() %{
predicate(n->get_int() == 1);
match(ConI);
@@ -3419,8 +3369,8 @@ operand immI_32_63() %{
interface(CONST_INTER);
%}
-operand immI_1() %{
- predicate( n->get_int() == 1 );
+operand immI_2() %{
+ predicate( n->get_int() == 2 );
match(ConI);
op_cost(0);
@@ -3428,8 +3378,8 @@ operand immI_1() %{
interface(CONST_INTER);
%}
-operand immI_2() %{
- predicate( n->get_int() == 2 );
+operand immI_3() %{
+ predicate( n->get_int() == 3 );
match(ConI);
op_cost(0);
@@ -3437,8 +3387,19 @@ operand immI_2() %{
interface(CONST_INTER);
%}
-operand immI_3() %{
- predicate( n->get_int() == 3 );
+operand immI_4()
+%{
+ predicate(n->get_int() == 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_8()
+%{
+ predicate(n->get_int() == 8);
match(ConI);
op_cost(0);
@@ -3815,6 +3776,18 @@ operand eRegP() %{
interface(REG_INTER);
%}
+operand rRegP() %{
+ constraint(ALLOC_IN_RC(int_reg));
+ match(RegP);
+ match(eAXRegP);
+ match(eBXRegP);
+ match(eCXRegP);
+ match(eDIRegP);
+
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// On windows95, EBP is not safe to use for implicit null tests.
operand eRegP_no_EBP() %{
constraint(ALLOC_IN_RC(int_reg_no_ebp));
@@ -3947,6 +3920,15 @@ operand eADXRegL_low_only() %{
interface(REG_INTER);
%}
+// Flags register, used as output of compare instructions
+operand rFlagsReg() %{
+ constraint(ALLOC_IN_RC(int_flags));
+ match(RegFlags);
+
+ format %{ "EFLAGS" %}
+ interface(REG_INTER);
+%}
+
// Flags register, used as output of compare instructions
operand eFlagsReg() %{
constraint(ALLOC_IN_RC(int_flags));
@@ -4077,6 +4059,14 @@ operand regF() %{
interface(REG_INTER);
%}
+operand legRegF() %{
+ predicate( UseSSE>=1 );
+ constraint(ALLOC_IN_RC(float_reg_legacy));
+ match(RegF);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
// Float register operands
operand vlRegF() %{
constraint(ALLOC_IN_RC(float_reg_vl));
@@ -4096,6 +4086,14 @@ operand regD() %{
%}
// Double register operands
+operand legRegD() %{
+ predicate( UseSSE>=2 );
+ constraint(ALLOC_IN_RC(double_reg_legacy));
+ match(RegD);
+ format %{ %}
+ interface(REG_INTER);
+%}
+
operand vlRegD() %{
constraint(ALLOC_IN_RC(double_reg_vl));
match(RegD);
@@ -4356,98 +4354,6 @@ operand stackSlotL(sRegL reg) %{
%}
%}
-//----------Memory Operands - Win95 Implicit Null Variants----------------
-// Indirect Memory Operand
-operand indirect_win95_safe(eRegP_no_EBP reg)
-%{
- constraint(ALLOC_IN_RC(int_reg));
- match(reg);
-
- op_cost(100);
- format %{ "[$reg]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index(0x4);
- scale(0x0);
- disp(0x0);
- %}
-%}
-
-// Indirect Memory Plus Short Offset Operand
-operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
-%{
- match(AddP reg off);
-
- op_cost(100);
- format %{ "[$reg + $off]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index(0x4);
- scale(0x0);
- disp($off);
- %}
-%}
-
-// Indirect Memory Plus Long Offset Operand
-operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
-%{
- match(AddP reg off);
-
- op_cost(100);
- format %{ "[$reg + $off]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index(0x4);
- scale(0x0);
- disp($off);
- %}
-%}
-
-// Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
-%{
- match(AddP (AddP reg ireg) off);
-
- op_cost(100);
- format %{"[$reg + $off + $ireg]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale(0x0);
- disp($off);
- %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register
-operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
-%{
- match(AddP reg (LShiftI ireg scale));
-
- op_cost(100);
- format %{"[$reg + $ireg << $scale]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale($scale);
- disp(0x0);
- %}
-%}
-
-// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
-%{
- match(AddP (AddP reg (LShiftI ireg scale)) off);
-
- op_cost(100);
- format %{"[$reg + $off + $ireg << $scale]" %}
- interface(MEMORY_INTER) %{
- base($reg);
- index($ireg);
- scale($scale);
- disp($off);
- %}
-%}
-
//----------Conditional Branch Operands----------------------------------------
// Comparison Op - This is the operation of the comparison, and is limited to
// the following set of codes:
@@ -5846,6 +5752,46 @@ instruct loadKlass(eRegP dst, memory mem) %{
ins_pipe( ialu_reg_mem );
%}
+// Load Float
+instruct MoveF2LEG(legRegF dst, regF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Float
+instruct MoveLEG2F(regF dst, legRegF src) %{
+ match(Set dst src);
+ format %{ "movss $dst,$src\t# if src != dst load float (4 bytes)" %}
+ ins_encode %{
+ __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Double
+instruct MoveD2LEG(legRegD dst, regD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
+// Load Double
+instruct MoveLEG2D(regD dst, legRegD src) %{
+ match(Set dst src);
+ format %{ "movsd $dst,$src\t# if src != dst load double (8 bytes)" %}
+ ins_encode %{
+ __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+ %}
+ ins_pipe( fpu_reg_reg );
+%}
+
// Load Double
instruct loadDPR(regDPR dst, memory mem) %{
predicate(UseSSE<=1);
@@ -5971,7 +5917,7 @@ instruct loadConI(rRegI dst, immI src) %{
%}
// Load Constant zero
-instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
+instruct loadConI0(rRegI dst, immI_0 src, eFlagsReg cr) %{
match(Set dst src);
effect(KILL cr);
@@ -7083,7 +7029,7 @@ instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
ins_pipe( ialu_reg );
%}
-instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
+instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
predicate(UseIncDec);
match(Set dst (AddI dst src));
effect(KILL cr);
@@ -7183,7 +7129,7 @@ instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
ins_pipe( ialu_mem_imm );
%}
-instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
+instruct incI_mem(memory dst, immI_1 src, eFlagsReg cr) %{
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
effect(KILL cr);
@@ -7552,7 +7498,7 @@ instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
%}
// Subtract from a pointer
-instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
+instruct subP_eReg(eRegP dst, rRegI src, immI_0 zero, eFlagsReg cr) %{
match(Set dst (AddP dst (SubI zero src)));
effect(KILL cr);
@@ -7563,7 +7509,7 @@ instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
ins_pipe( ialu_reg_reg );
%}
-instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
+instruct negI_eReg(rRegI dst, immI_0 zero, eFlagsReg cr) %{
match(Set dst (SubI zero dst));
effect(KILL cr);
@@ -8017,7 +7963,7 @@ instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlag
// Integer Shift Instructions
// Shift Left by one
-instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shlI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@@ -8053,7 +7999,7 @@ instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
%}
// Arithmetic shift right by one
-instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct sarI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@@ -8065,7 +8011,7 @@ instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
%}
// Arithmetic shift right by one
-instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
+instruct sarI_mem_1(memory dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
effect(KILL cr);
format %{ "SAR $dst,$shift" %}
@@ -8110,7 +8056,7 @@ instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
%}
// Logical shift right by one
-instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shrI_eReg_1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@@ -8266,7 +8212,7 @@ instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1
ins_pipe(ialu_reg_mem);
%}
-instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, eFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero src) src));
predicate(UseBMI1Instructions);
effect(KILL cr);
@@ -8279,7 +8225,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
ins_pipe(ialu_reg);
%}
-instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, eFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
predicate(UseBMI1Instructions);
effect(KILL cr);
@@ -8431,7 +8377,7 @@ instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
// ROL/ROR
// ROL expand
-instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROL $dst, $shift" %}
@@ -8460,7 +8406,7 @@ instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
// end of ROL expand
// ROL 32bit by one once
-instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i1(rRegI dst, immI_1 lshift, immI_M1 rshift, eFlagsReg cr) %{
match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
expand %{
@@ -8479,7 +8425,7 @@ instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
%}
// ROL 32bit var by var once
-instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
+instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
expand %{
@@ -8497,7 +8443,7 @@ instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr
%}
// ROR expand
-instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm1(rRegI dst, immI_1 shift, eFlagsReg cr) %{
effect(USE_DEF dst, USE shift, KILL cr);
format %{ "ROR $dst, $shift" %}
@@ -8526,7 +8472,7 @@ instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
// end of ROR expand
// ROR right once
-instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i1(rRegI dst, immI_1 rshift, immI_M1 lshift, eFlagsReg cr) %{
match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
expand %{
@@ -8545,7 +8491,7 @@ instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
%}
// ROR 32bit var by var once
-instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
+instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI_0 zero, eFlagsReg cr) %{
match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
expand %{
@@ -8713,7 +8659,7 @@ instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
ins_pipe(pipe_slow);
%}
-instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
+instruct cmpLTMask0(rRegI dst, immI_0 zero, eFlagsReg cr) %{
match(Set dst (CmpLTMask dst zero));
effect(DEF dst, KILL cr);
ins_cost(100);
@@ -8827,7 +8773,7 @@ instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
ins_pipe(ialu_reg_reg);
%}
-instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
+instruct overflowNegI_rReg(eFlagsReg cr, immI_0 zero, eAXRegI op2)
%{
match(Set cr (OverflowSubI zero op2));
effect(DEF cr, USE_KILL op2);
@@ -11824,12 +11770,12 @@ instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2
ins_pipe( pipe_slow );
%}
-instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
+instruct string_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
- predicate(UseSSE42Intrinsics);
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
ins_encode %{
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
$vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
@@ -11837,6 +11783,20 @@ instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
ins_pipe( pipe_slow );
%}
+instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
+ eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
+ ins_encode %{
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
+ $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
+
// fast array equals
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
@@ -11965,7 +11925,7 @@ instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
ins_pipe( ialu_cr_reg_mem );
%}
-instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
+instruct testI_reg( eFlagsReg cr, rRegI src, immI_0 zero ) %{
match(Set cr (CmpI src zero));
effect( DEF cr, USE src );
@@ -11975,7 +11935,7 @@ instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
ins_pipe( ialu_cr_reg_imm );
%}
-instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
+instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI_0 zero ) %{
match(Set cr (CmpI (AndI src con) zero));
format %{ "TEST $src,$con" %}
@@ -11984,7 +11944,7 @@ instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
ins_pipe( ialu_cr_reg_imm );
%}
-instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
+instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI_0 zero ) %{
match(Set cr (CmpI (AndI src mem) zero));
format %{ "TEST $src,$mem" %}
@@ -12034,7 +11994,7 @@ instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
// ins_encode( OpcP, RegMem( op1, op2) );
//%}
-instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
+instruct testU_reg( eFlagsRegU cr, rRegI src, immI_0 zero ) %{
match(Set cr (CmpU src zero));
format %{ "TESTu $src,$src" %}
@@ -12111,7 +12071,7 @@ instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
// Cisc-spilled version of testP_reg
// This will generate a signed flags result. This should be ok
// since any compare to a zero should be eq/neq.
-instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
+instruct testP_Reg_mem( eFlagsReg cr, memory op, immI_0 zero ) %{
match(Set cr (CmpP (LoadP op) zero));
format %{ "TEST $op,0xFFFFFFFF" %}
@@ -13142,6 +13102,28 @@ instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst,
ins_pipe( pipe_cmov_reg_long );
%}
+instruct cmovLL_reg_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, eRegL src) %{
+ match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
+ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
+ ins_cost(400);
+ format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+ "CMOV$cmp $dst.hi,$src.hi" %}
+ opcode(0x0F,0x40);
+ ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
+ ins_pipe( pipe_cmov_reg_long );
+%}
+
+instruct cmovLL_mem_LEGT_U(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, eRegL dst, load_long_memory src) %{
+ match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
+ predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
+ ins_cost(500);
+ format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
+ "CMOV$cmp $dst.hi,$src.hi+4" %}
+ opcode(0x0F,0x40);
+ ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
+ ins_pipe( pipe_cmov_reg_long );
+%}
+
// Compare 2 longs and CMOVE ints.
instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
@@ -13482,7 +13464,7 @@ instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
// match(Set dst (CopyI src));
// %}
//
-// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
+// instruct incI_eReg(rRegI dst, immI_1 src, eFlagsReg cr) %{
// match(Set dst (AddI dst src));
// effect(KILL cr);
// %}
diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad
index fe1b44741a8..16cf230d602 100644
--- a/src/hotspot/cpu/x86/x86_64.ad
+++ b/src/hotspot/cpu/x86/x86_64.ad
@@ -161,9 +161,7 @@ alloc_class chunk0(R10, R10_H,
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
-// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ )
-// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Empty register class.
@@ -932,8 +930,8 @@ void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
st->print_cr("popq rbp");
if (do_polling() && C->is_method_compilation()) {
st->print("\t");
- st->print_cr("movq rscratch1, poll_offset[r15_thread] #polling_page_address\n\t"
- "testl rax, [rscratch1]\t"
+ st->print_cr("cmpq rsp, poll_offset[r15_thread] \n\t"
+ "ja #safepoint_stub\t"
"# Safepoint: poll for GC");
}
}
@@ -980,9 +978,13 @@ void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
if (do_polling() && C->is_method_compilation()) {
MacroAssembler _masm(&cbuf);
- __ movq(rscratch1, Address(r15_thread, Thread::polling_page_offset()));
+ Label dummy_label;
+ Label* code_stub = &dummy_label;
+ if (!C->output()->in_scratch_emit_size()) {
+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+ }
__ relocate(relocInfo::poll_return_type);
- __ testl(rax, Address(rscratch1, 0));
+ __ safepoint_poll(*code_stub, r15_thread, true /* at_return */, true /* in_nmethod */);
}
}
@@ -1657,9 +1659,6 @@ const bool Matcher::rematerialize_float_constants = true; // XXX
// C code as the Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-// No-op on amd64
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -2738,8 +2737,6 @@ frame
// These three registers define part of the calling convention
// between compiled code and the interpreter.
inline_cache_reg(RAX); // Inline Cache Register
- interpreter_method_reg(RBX); // Method Register when
- // calling interpreter
// Optional: name the operand used by cisc-spilling to access
// [stack_pointer + offset]
@@ -2867,7 +2864,7 @@ operand immI()
%}
// Constant for test vs zero
-operand immI0()
+operand immI_0()
%{
predicate(n->get_int() == 0);
match(ConI);
@@ -2878,7 +2875,7 @@ operand immI0()
%}
// Constant for increment
-operand immI1()
+operand immI_1()
%{
predicate(n->get_int() == 1);
match(ConI);
@@ -2899,6 +2896,36 @@ operand immI_M1()
interface(CONST_INTER);
%}
+operand immI_2()
+%{
+ predicate(n->get_int() == 2);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_4()
+%{
+ predicate(n->get_int() == 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_8()
+%{
+ predicate(n->get_int() == 8);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
// Valid scale values for addressing modes
operand immI2()
%{
@@ -2909,6 +2936,16 @@ operand immI2()
interface(CONST_INTER);
%}
+operand immU7()
+%{
+ predicate((0 <= n->get_int()) && (n->get_int() <= 0x7F));
+ match(ConI);
+
+ op_cost(5);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
operand immI8()
%{
predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
@@ -5203,19 +5240,19 @@ instruct maxF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
match(Set dst (MaxF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
- "blendvps $btmp,$b,$a,$b \n\t"
- "blendvps $atmp,$a,$b,$b \n\t"
+ "vblendvps $btmp,$b,$a,$b \n\t"
+ "vblendvps $atmp,$a,$b,$b \n\t"
"vmaxss $tmp,$atmp,$btmp \n\t"
- "cmpps.unordered $btmp,$atmp,$atmp \n\t"
- "blendvps $dst,$tmp,$atmp,$btmp \n\t"
+ "vcmpps.unordered $btmp,$atmp,$atmp \n\t"
+ "vblendvps $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
- __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
- __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
+ __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
+ __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vmaxss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
- __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
- __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
+ __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
+ __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5239,19 +5276,19 @@ instruct maxD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
match(Set dst (MaxD a b));
effect(USE a, USE b, TEMP atmp, TEMP btmp, TEMP tmp);
format %{
- "blendvpd $btmp,$b,$a,$b \n\t"
- "blendvpd $atmp,$a,$b,$b \n\t"
+ "vblendvpd $btmp,$b,$a,$b \n\t"
+ "vblendvpd $atmp,$a,$b,$b \n\t"
"vmaxsd $tmp,$atmp,$btmp \n\t"
- "cmppd.unordered $btmp,$atmp,$atmp \n\t"
- "blendvpd $dst,$tmp,$atmp,$btmp \n\t"
+ "vcmppd.unordered $btmp,$atmp,$atmp \n\t"
+ "vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
- __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
- __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
+ __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vector_len);
+ __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $b$$XMMRegister, vector_len);
__ vmaxsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
- __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
- __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
+ __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
+ __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5275,19 +5312,19 @@ instruct minF_reg(legRegF dst, legRegF a, legRegF b, legRegF tmp, legRegF atmp,
match(Set dst (MinF a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
- "blendvps $atmp,$a,$b,$a \n\t"
- "blendvps $btmp,$b,$a,$a \n\t"
+ "vblendvps $atmp,$a,$b,$a \n\t"
+ "vblendvps $btmp,$b,$a,$a \n\t"
"vminss $tmp,$atmp,$btmp \n\t"
- "cmpps.unordered $btmp,$atmp,$atmp \n\t"
- "blendvps $dst,$tmp,$atmp,$btmp \n\t"
+ "vcmpps.unordered $btmp,$atmp,$atmp \n\t"
+ "vblendvps $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
- __ blendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
- __ blendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
+ __ vblendvps($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
+ __ vblendvps($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vminss($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
- __ cmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
- __ blendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
+ __ vcmpps($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
+ __ vblendvps($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5311,19 +5348,19 @@ instruct minD_reg(legRegD dst, legRegD a, legRegD b, legRegD tmp, legRegD atmp,
match(Set dst (MinD a b));
effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp);
format %{
- "blendvpd $atmp,$a,$b,$a \n\t"
- "blendvpd $btmp,$b,$a,$a \n\t"
+ "vblendvpd $atmp,$a,$b,$a \n\t"
+ "vblendvpd $btmp,$b,$a,$a \n\t"
"vminsd $tmp,$atmp,$btmp \n\t"
- "cmppd.unordered $btmp,$atmp,$atmp \n\t"
- "blendvpd $dst,$tmp,$atmp,$btmp \n\t"
+ "vcmppd.unordered $btmp,$atmp,$atmp \n\t"
+ "vblendvpd $dst,$tmp,$atmp,$btmp \n\t"
%}
ins_encode %{
int vector_len = Assembler::AVX_128bit;
- __ blendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
- __ blendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
+ __ vblendvpd($atmp$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, vector_len);
+ __ vblendvpd($btmp$$XMMRegister, $b$$XMMRegister, $a$$XMMRegister, $a$$XMMRegister, vector_len);
__ vminsd($tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister);
- __ cmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
- __ blendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
+ __ vcmppd($btmp$$XMMRegister, $atmp$$XMMRegister, $atmp$$XMMRegister, Assembler::_false, vector_len);
+ __ vblendvpd($dst$$XMMRegister, $tmp$$XMMRegister, $atmp$$XMMRegister, $btmp$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}
@@ -5547,7 +5584,7 @@ instruct loadConI(rRegI dst, immI src)
ins_pipe(ialu_reg_fat); // XXX
%}
-instruct loadConI0(rRegI dst, immI0 src, rFlagsReg cr)
+instruct loadConI0(rRegI dst, immI_0 src, rFlagsReg cr)
%{
match(Set dst src);
effect(KILL cr);
@@ -5983,7 +6020,7 @@ instruct storeImmNKlass(memory mem, immNKlass src)
%}
// Store Integer Immediate
-instruct storeImmI0(memory mem, immI0 zero)
+instruct storeImmI0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreI mem zero));
@@ -6033,7 +6070,7 @@ instruct storeImmL(memory mem, immL32 src)
%}
// Store Short/Char Immediate
-instruct storeImmC0(memory mem, immI0 zero)
+instruct storeImmC0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreC mem zero));
@@ -6059,7 +6096,7 @@ instruct storeImmI16(memory mem, immI16 src)
%}
// Store Byte Immediate
-instruct storeImmB0(memory mem, immI0 zero)
+instruct storeImmB0(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreB mem zero));
@@ -6084,7 +6121,7 @@ instruct storeImmB(memory mem, immI8 src)
%}
// Store CMS card-mark Immediate
-instruct storeImmCM0_reg(memory mem, immI0 zero)
+instruct storeImmCM0_reg(memory mem, immI_0 zero)
%{
predicate(UseCompressedOops && (CompressedOops::base() == NULL));
match(Set mem (StoreCM mem zero));
@@ -6097,7 +6134,7 @@ instruct storeImmCM0_reg(memory mem, immI0 zero)
ins_pipe(ialu_mem_reg);
%}
-instruct storeImmCM0(memory mem, immI0 src)
+instruct storeImmCM0(memory mem, immI_0 src)
%{
match(Set mem (StoreCM mem src));
@@ -7182,7 +7219,7 @@ instruct addI_mem_imm(memory dst, immI src, rFlagsReg cr)
ins_pipe(ialu_mem_imm);
%}
-instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
+instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
%{
predicate(UseIncDec);
match(Set dst (AddI dst src));
@@ -7194,7 +7231,7 @@ instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
-instruct incI_mem(memory dst, immI1 src, rFlagsReg cr)
+instruct incI_mem(memory dst, immI_1 src, rFlagsReg cr)
%{
predicate(UseIncDec);
match(Set dst (StoreI dst (AddI (LoadI dst) src)));
@@ -8077,7 +8114,7 @@ instruct subL_mem_imm(memory dst, immL32 src, rFlagsReg cr)
// Subtract from a pointer
// XXX hmpf???
-instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
+instruct subP_rReg(rRegP dst, rRegI src, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (AddP dst (SubI zero src)));
effect(KILL cr);
@@ -8088,7 +8125,7 @@ instruct subP_rReg(rRegP dst, rRegI src, immI0 zero, rFlagsReg cr)
ins_pipe(ialu_reg_reg);
%}
-instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
+instruct negI_rReg(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (SubI zero dst));
effect(KILL cr);
@@ -8099,7 +8136,19 @@ instruct negI_rReg(rRegI dst, immI0 zero, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
-instruct negI_mem(memory dst, immI0 zero, rFlagsReg cr)
+instruct negI_rReg_2(rRegI dst, rFlagsReg cr)
+%{
+ match(Set dst (NegI dst));
+ effect(KILL cr);
+
+ format %{ "negl $dst\t# int" %}
+ ins_encode %{
+ __ negl($dst$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
+instruct negI_mem(memory dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (StoreI dst (SubI zero (LoadI dst))));
effect(KILL cr);
@@ -8121,6 +8170,18 @@ instruct negL_rReg(rRegL dst, immL0 zero, rFlagsReg cr)
ins_pipe(ialu_reg);
%}
+instruct negL_rReg_2(rRegL dst, rFlagsReg cr)
+%{
+ match(Set dst (NegL dst));
+ effect(KILL cr);
+
+ format %{ "negq $dst\t# int" %}
+ ins_encode %{
+ __ negq($dst$$Register);
+ %}
+ ins_pipe(ialu_reg);
+%}
+
instruct negL_mem(memory dst, immL0 zero, rFlagsReg cr)
%{
match(Set dst (StoreL dst (SubL zero (LoadL dst))));
@@ -8446,7 +8507,7 @@ instruct modL_rReg(rdx_RegL rdx, rax_RegL rax, no_rax_rdx_RegL div,
// Integer Shift Instructions
// Shift Left by one
-instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
+instruct salI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (LShiftI dst shift));
effect(KILL cr);
@@ -8458,7 +8519,7 @@ instruct salI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Shift Left by one
-instruct salI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct salI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
effect(KILL cr);
@@ -8518,7 +8579,7 @@ instruct salI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
-instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
+instruct sarI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (RShiftI dst shift));
effect(KILL cr);
@@ -8530,7 +8591,7 @@ instruct sarI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
-instruct sarI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct sarI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
effect(KILL cr);
@@ -8590,7 +8651,7 @@ instruct sarI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Logical shift right by one
-instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
+instruct shrI_rReg_1(rRegI dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (URShiftI dst shift));
effect(KILL cr);
@@ -8602,7 +8663,7 @@ instruct shrI_rReg_1(rRegI dst, immI1 shift, rFlagsReg cr)
%}
// Logical shift right by one
-instruct shrI_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct shrI_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
effect(KILL cr);
@@ -8663,7 +8724,7 @@ instruct shrI_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
// Long Shift Instructions
// Shift Left by one
-instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
+instruct salL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (LShiftL dst shift));
effect(KILL cr);
@@ -8675,7 +8736,7 @@ instruct salL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Shift Left by one
-instruct salL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct salL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
effect(KILL cr);
@@ -8736,7 +8797,7 @@ instruct salL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
-instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
+instruct sarL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (RShiftL dst shift));
effect(KILL cr);
@@ -8748,7 +8809,7 @@ instruct sarL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Arithmetic shift right by one
-instruct sarL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct sarL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
effect(KILL cr);
@@ -8809,7 +8870,7 @@ instruct sarL_mem_CL(memory dst, rcx_RegI shift, rFlagsReg cr)
%}
// Logical shift right by one
-instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
+instruct shrL_rReg_1(rRegL dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (URShiftL dst shift));
effect(KILL cr);
@@ -8821,7 +8882,7 @@ instruct shrL_rReg_1(rRegL dst, immI1 shift, rFlagsReg cr)
%}
// Logical shift right by one
-instruct shrL_mem_1(memory dst, immI1 shift, rFlagsReg cr)
+instruct shrL_mem_1(memory dst, immI_1 shift, rFlagsReg cr)
%{
match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
effect(KILL cr);
@@ -9193,7 +9254,7 @@ instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1
ins_pipe(ialu_reg);
%}
-instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
+instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI_0 imm_zero, rFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero src) src));
predicate(UseBMI1Instructions);
effect(KILL cr);
@@ -9206,7 +9267,7 @@ instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, rFlagsReg cr) %{
ins_pipe(ialu_reg);
%}
-instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, rFlagsReg cr) %{
+instruct blsiI_rReg_mem(rRegI dst, memory src, immI_0 imm_zero, rFlagsReg cr) %{
match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
predicate(UseBMI1Instructions);
effect(KILL cr);
@@ -9889,7 +9950,7 @@ instruct cmpLTMask(rRegI dst, rRegI p, rRegI q, rFlagsReg cr)
ins_pipe(pipe_slow);
%}
-instruct cmpLTMask0(rRegI dst, immI0 zero, rFlagsReg cr)
+instruct cmpLTMask0(rRegI dst, immI_0 zero, rFlagsReg cr)
%{
match(Set dst (CmpLTMask dst zero));
effect(KILL cr);
@@ -11003,13 +11064,13 @@ instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI
ins_pipe( pipe_slow );
%}
-instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
+instruct string_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
%{
- predicate(UseSSE42Intrinsics);
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U));
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
ins_encode %{
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
$tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
@@ -11017,6 +11078,20 @@ instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
ins_pipe( pipe_slow );
%}
+instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
+ rbx_RegI result, legRegD tmp_vec1, legRegD tmp_vec2, legRegD tmp_vec3, rcx_RegI tmp, rFlagsReg cr)
+%{
+ predicate(UseSSE42Intrinsics && (((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L));
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ effect(TEMP tmp_vec1, TEMP tmp_vec2, TEMP tmp_vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result // KILL all" %}
+ ins_encode %{
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
+ $tmp_vec1$$XMMRegister, $tmp_vec2$$XMMRegister, $tmp_vec3$$XMMRegister, $tmp$$Register);
+ %}
+ ins_pipe( pipe_slow );
+%}
+
// fast string equals
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
@@ -11222,7 +11297,7 @@ instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
ins_pipe(ialu_reg_reg);
%}
-instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
+instruct overflowNegI_rReg(rFlagsReg cr, immI_0 zero, rax_RegI op2)
%{
match(Set cr (OverflowSubI zero op2));
effect(DEF cr, USE_KILL op2);
@@ -11331,7 +11406,7 @@ instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
ins_pipe(ialu_cr_reg_mem);
%}
-instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
+instruct testI_reg(rFlagsReg cr, rRegI src, immI_0 zero)
%{
match(Set cr (CmpI src zero));
@@ -11341,7 +11416,7 @@ instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
ins_pipe(ialu_cr_reg_imm);
%}
-instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
+instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI_0 zero)
%{
match(Set cr (CmpI (AndI src con) zero));
@@ -11351,7 +11426,7 @@ instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
ins_pipe(ialu_cr_reg_imm);
%}
-instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
+instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI_0 zero)
%{
match(Set cr (CmpI (AndI src (LoadI mem)) zero));
@@ -11405,7 +11480,7 @@ instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
// // ins_encode( OpcP, reg_mem( op1, op2) );
// //%}
-instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
+instruct testU_reg(rFlagsRegU cr, rRegI src, immI_0 zero)
%{
match(Set cr (CmpU src zero));
@@ -11743,7 +11818,7 @@ instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
ins_pipe(ialu_cr_reg_mem);
%}
-instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU8 imm, immI0 zero)
+instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU7 imm, immI_0 zero)
%{
match(Set cr (CmpI (AndI (LoadUB mem) imm) zero));
@@ -11753,7 +11828,7 @@ instruct testUB_mem_imm(rFlagsReg cr, memory mem, immU8 imm, immI0 zero)
ins_pipe(ialu_cr_reg_mem);
%}
-instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
+instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI_0 zero)
%{
match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
@@ -12476,7 +12551,7 @@ instruct tlsLoadP(r15_RegP dst) %{
// match(Set dst (CopyI src));
// %}
//
-// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
+// instruct incI_rReg(rRegI dst, immI_1 src, rFlagsReg cr)
// %{
// match(Set dst (AddI dst src));
// effect(KILL cr);
diff --git a/src/hotspot/cpu/zero/assembler_zero.cpp b/src/hotspot/cpu/zero/assembler_zero.cpp
index b23048bb82b..706e020123a 100644
--- a/src/hotspot/cpu/zero/assembler_zero.cpp
+++ b/src/hotspot/cpu/zero/assembler_zero.cpp
@@ -66,12 +66,6 @@ void MacroAssembler::advance(int bytes) {
code_section()->set_end(code_section()->end() + bytes);
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(
- intptr_t* delayed_value_addr, Register tmpl, int offset) {
- ShouldNotCallThis();
- return RegisterOrConstant();
-}
-
void MacroAssembler::store_oop(jobject obj) {
code_section()->relocate(pc(), oop_Relocation::spec_for_immediate());
emit_address((address) obj);
diff --git a/src/hotspot/cpu/zero/assembler_zero.hpp b/src/hotspot/cpu/zero/assembler_zero.hpp
index 1edf2c7df49..ae4c58e8ac7 100644
--- a/src/hotspot/cpu/zero/assembler_zero.hpp
+++ b/src/hotspot/cpu/zero/assembler_zero.hpp
@@ -54,9 +54,6 @@ class MacroAssembler : public Assembler {
void bang_stack_with_offset(int offset);
bool needs_explicit_null_check(intptr_t offset);
bool uses_implicit_null_check(void* address);
- RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp, int offset);
- public:
void advance(int bytes);
void store_oop(jobject obj);
void store_Metadata(Metadata* obj);
diff --git a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.cpp b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.cpp
index d20fe3e4f7f..f165de3a086 100644
--- a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.cpp
@@ -47,7 +47,6 @@ const char *BytecodeInterpreter::name_of_field_at_address(address addr) {
DO(_constants);
DO(_method);
DO(_mirror);
- DO(_mdx);
DO(_stack);
DO(_msg);
DO(_result);
@@ -84,7 +83,6 @@ void BytecodeInterpreter::layout_interpreterState(interpreterState istate,
istate->set_msg(BytecodeInterpreter::method_resume);
istate->set_bcp_advance(0);
istate->set_oop_temp(NULL);
- istate->set_mdx(NULL);
if (caller->is_interpreted_frame()) {
interpreterState prev = caller->get_interpreterState();
prev->set_callee(method);
diff --git a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.hpp b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.hpp
index 9e3c6e209a4..07166fbef76 100644
--- a/src/hotspot/cpu/zero/bytecodeInterpreter_zero.hpp
+++ b/src/hotspot/cpu/zero/bytecodeInterpreter_zero.hpp
@@ -153,22 +153,4 @@
#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
((VMJavaVal64*)(addr))->l)
-// VMSlots implementation
-
-#define VMSLOTS_SLOT(offset) ((intptr_t*)&vmslots[(offset)])
-#define VMSLOTS_ADDR(offset) ((address)vmslots[(offset)])
-#define VMSLOTS_INT(offset) (*((jint*)&vmslots[(offset)]))
-#define VMSLOTS_FLOAT(offset) (*((jfloat*)&vmslots[(offset)]))
-#define VMSLOTS_OBJECT(offset) ((oop)vmslots[(offset)])
-#define VMSLOTS_DOUBLE(offset) (((VMJavaVal64*)&vmslots[(offset) - 1])->d)
-#define VMSLOTS_LONG(offset) (((VMJavaVal64*)&vmslots[(offset) - 1])->l)
-
-#define SET_VMSLOTS_SLOT(value, offset) (*(intptr_t*)&vmslots[(offset)] = *(intptr_t *)(value))
-#define SET_VMSLOTS_ADDR(value, offset) (*((address *)&vmslots[(offset)]) = (value))
-#define SET_VMSLOTS_INT(value, offset) (*((jint *)&vmslots[(offset)]) = (value))
-#define SET_VMSLOTS_FLOAT(value, offset) (*((jfloat *)&vmslots[(offset)]) = (value))
-#define SET_VMSLOTS_OBJECT(value, offset) (*((oop *)&vmslots[(offset)]) = (value))
-#define SET_VMSLOTS_DOUBLE(value, offset) (((VMJavaVal64*)&vmslots[(offset) - 1])->d = (value))
-#define SET_VMSLOTS_LONG(value, offset) (((VMJavaVal64*)&vmslots[(offset) - 1])->l = (value))
-
#endif // CPU_ZERO_BYTECODEINTERPRETER_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/frame_zero.inline.hpp b/src/hotspot/cpu/zero/frame_zero.inline.hpp
index 11472461d64..396e189a5db 100644
--- a/src/hotspot/cpu/zero/frame_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/frame_zero.inline.hpp
@@ -107,7 +107,8 @@ inline oop* frame::interpreter_frame_mirror_addr() const {
}
inline intptr_t* frame::interpreter_frame_mdp_addr() const {
- return (intptr_t*) &(get_interpreterState()->_mdx);
+ fatal("Should not call this: Zero never profiles");
+ return NULL; // silence compiler warnings
}
inline intptr_t* frame::interpreter_frame_tos_address() const {
diff --git a/src/hotspot/cpu/zero/interp_masm_zero.hpp b/src/hotspot/cpu/zero/interp_masm_zero.hpp
index 5d8aa9a3bdc..a109b12dec0 100644
--- a/src/hotspot/cpu/zero/interp_masm_zero.hpp
+++ b/src/hotspot/cpu/zero/interp_masm_zero.hpp
@@ -35,14 +35,6 @@
class InterpreterMacroAssembler : public MacroAssembler {
public:
InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
-
- public:
- RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- ShouldNotCallThis();
- return RegisterOrConstant();
- }
};
#endif // CPU_ZERO_INTERP_MASM_ZERO_HPP
diff --git a/src/hotspot/cpu/zero/stack_zero.inline.hpp b/src/hotspot/cpu/zero/stack_zero.inline.hpp
index 0c85af5dfde..f363de57180 100644
--- a/src/hotspot/cpu/zero/stack_zero.inline.hpp
+++ b/src/hotspot/cpu/zero/stack_zero.inline.hpp
@@ -47,7 +47,7 @@ inline void ZeroStack::overflow_check(int required_words, TRAPS) {
// to use under normal circumstances. Note that the returned
// value can be negative.
inline int ZeroStack::abi_stack_available(Thread *thread) const {
- guarantee(Thread::current() == thread, "should run in the same thread");
+ assert(Thread::current() == thread, "should run in the same thread");
int stack_used = thread->stack_base() - (address) &stack_used
+ (StackOverflow::stack_guard_zone_size() + StackOverflow::stack_shadow_zone_size());
int stack_free = thread->stack_size() - stack_used;
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
index 7c4c1f73908..a865ef37a2f 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.cpp
@@ -70,10 +70,11 @@ void ZeroInterpreter::initialize_code() {
// Allow c++ interpreter to do one initialization now that switches are set, etc.
BytecodeInterpreter start_msg(BytecodeInterpreter::initialize);
- if (JvmtiExport::can_post_interpreter_events())
- BytecodeInterpreter::runWithChecks(&start_msg);
- else
- BytecodeInterpreter::run(&start_msg);
+ if (JvmtiExport::can_post_interpreter_events()) {
+ BytecodeInterpreter::run(&start_msg);
+ } else {
+ BytecodeInterpreter::run(&start_msg);
+ }
}
void ZeroInterpreter::invoke_method(Method* method, address entry_point, TRAPS) {
@@ -169,10 +170,11 @@ void ZeroInterpreter::main_loop(int recurse, TRAPS) {
thread->set_last_Java_frame();
// Call the interpreter
- if (JvmtiExport::can_post_interpreter_events())
- BytecodeInterpreter::runWithChecks(istate);
- else
- BytecodeInterpreter::run(istate);
+ if (JvmtiExport::can_post_interpreter_events()) {
+ BytecodeInterpreter::run(istate);
+ } else {
+ BytecodeInterpreter::run(istate);
+ }
fixup_after_potential_safepoint();
// Clear the frame anchor
@@ -596,6 +598,9 @@ int ZeroInterpreter::accessor_entry(Method* method, intptr_t UNUSED, TRAPS) {
break;
}
if (entry->is_volatile()) {
+ if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+ OrderAccess::fence();
+ }
switch (entry->flag_state()) {
case ctos:
SET_LOCALS_INT(object->char_field_acquire(entry->f2_as_index()), 0);
@@ -694,56 +699,6 @@ int ZeroInterpreter::empty_entry(Method* method, intptr_t UNUSED, TRAPS) {
return 0;
}
-// The new slots will be inserted before slot insert_before.
-// Slots < insert_before will have the same slot number after the insert.
-// Slots >= insert_before will become old_slot + num_slots.
-void ZeroInterpreter::insert_vmslots(int insert_before, int num_slots, TRAPS) {
- JavaThread *thread = THREAD->as_Java_thread();
- ZeroStack *stack = thread->zero_stack();
-
- // Allocate the space
- stack->overflow_check(num_slots, CHECK);
- stack->alloc(num_slots * wordSize);
- intptr_t *vmslots = stack->sp();
-
- // Shuffle everything up
- for (int i = 0; i < insert_before; i++)
- SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i + num_slots), i);
-}
-
-void ZeroInterpreter::remove_vmslots(int first_slot, int num_slots, TRAPS) {
- JavaThread *thread = THREAD->as_Java_thread();
- ZeroStack *stack = thread->zero_stack();
- intptr_t *vmslots = stack->sp();
-
- // Move everything down
- for (int i = first_slot - 1; i >= 0; i--)
- SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i + num_slots);
-
- // Deallocate the space
- stack->set_sp(stack->sp() + num_slots);
-}
-
-BasicType ZeroInterpreter::result_type_of_handle(oop method_handle) {
- oop method_type = java_lang_invoke_MethodHandle::type(method_handle);
- oop return_type = java_lang_invoke_MethodType::rtype(method_type);
- return java_lang_Class::as_BasicType(return_type, (Klass* *) NULL);
-}
-
-intptr_t* ZeroInterpreter::calculate_unwind_sp(ZeroStack* stack,
- oop method_handle) {
- oop method_type = java_lang_invoke_MethodHandle::type(method_handle);
- int argument_slots = java_lang_invoke_MethodType::ptype_slot_count(method_type);
-
- return stack->sp() + argument_slots;
-}
-
-JRT_ENTRY(void, ZeroInterpreter::throw_exception(JavaThread* thread,
- Symbol* name,
- char* message))
- THROW_MSG(name, message);
-JRT_END
-
InterpreterFrame *InterpreterFrame::build(Method* const method, TRAPS) {
JavaThread *thread = THREAD->as_Java_thread();
ZeroStack *stack = thread->zero_stack();
@@ -796,7 +751,6 @@ InterpreterFrame *InterpreterFrame::build(Method* const method, TRAPS) {
istate->set_constants(method->constants()->cache());
istate->set_msg(BytecodeInterpreter::method_entry);
istate->set_oop_temp(NULL);
- istate->set_mdx(NULL);
istate->set_callee(NULL);
istate->set_monitor_base((BasicObjectLock *) stack->sp());
diff --git a/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp b/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp
index e534ff2fa29..2e6239c7f02 100644
--- a/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp
+++ b/src/hotspot/cpu/zero/zeroInterpreter_zero.hpp
@@ -41,12 +41,4 @@
// Main loop of normal_entry
static void main_loop(int recurse, TRAPS);
- private:
- // Helpers for method_handle_entry
- static void insert_vmslots(int insert_before, int num_slots, TRAPS);
- static void remove_vmslots(int first_slot, int num_slots, TRAPS);
- static BasicType result_type_of_handle(oop method_handle);
- static intptr_t* calculate_unwind_sp(ZeroStack* stack, oop method_handle);
- static void throw_exception(JavaThread* thread, Symbol* name,char *msg=NULL);
-
#endif // CPU_ZERO_CPPINTERPRETER_ZERO_HPP
diff --git a/src/hotspot/os/aix/globals_aix.hpp b/src/hotspot/os/aix/globals_aix.hpp
index b6b67a4f3bb..1203b09c883 100644
--- a/src/hotspot/os/aix/globals_aix.hpp
+++ b/src/hotspot/os/aix/globals_aix.hpp
@@ -88,7 +88,6 @@
// Use Use64KPages or Use16MPages instead.
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
-define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;
#endif // OS_AIX_GLOBALS_AIX_HPP
diff --git a/src/hotspot/os/aix/os_aix.cpp b/src/hotspot/os/aix/os_aix.cpp
index 552c345563a..71cc3056a55 100644
--- a/src/hotspot/os/aix/os_aix.cpp
+++ b/src/hotspot/os/aix/os_aix.cpp
@@ -2236,7 +2236,7 @@ bool os::can_execute_large_page_memory() {
return false;
}
-char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, int file_desc) {
+char* os::pd_attempt_map_memory_to_file_at(char* requested_addr, size_t bytes, int file_desc) {
assert(file_desc >= 0, "file_desc is not valid");
char* result = NULL;
diff --git a/src/hotspot/os/aix/safepointMechanism_aix.cpp b/src/hotspot/os/aix/safepointMechanism_aix.cpp
index cda4301239f..44b01a51afe 100644
--- a/src/hotspot/os/aix/safepointMechanism_aix.cpp
+++ b/src/hotspot/os/aix/safepointMechanism_aix.cpp
@@ -37,6 +37,10 @@ void SafepointMechanism::pd_initialize() {
return;
}
+ // Poll bit values
+ _poll_word_armed_value = poll_bit();
+ _poll_word_disarmed_value = ~_poll_word_armed_value;
+
// Allocate one protected page
char* map_address = (char*)MAP_FAILED;
const size_t page_size = os::vm_page_size();
@@ -104,8 +108,8 @@ void SafepointMechanism::pd_initialize() {
if (!os::guard_memory((char*)_polling_page, page_size)) {
fatal("Could not protect polling page");
}
- intptr_t bad_page_val = reinterpret_cast(map_address),
- good_page_val = bad_page_val + os::vm_page_size();
- _poll_armed_value = reinterpret_cast(bad_page_val + poll_bit());
- _poll_disarmed_value = reinterpret_cast(good_page_val);
+ uintptr_t bad_page_val = reinterpret_cast(map_address),
+ good_page_val = bad_page_val + os::vm_page_size();
+ _poll_page_armed_value = bad_page_val + poll_bit();
+ _poll_page_disarmed_value = good_page_val;
}
diff --git a/src/hotspot/os/bsd/globals_bsd.hpp b/src/hotspot/os/bsd/globals_bsd.hpp
index 6c8939a6dc0..b36173655df 100644
--- a/src/hotspot/os/bsd/globals_bsd.hpp
+++ b/src/hotspot/os/bsd/globals_bsd.hpp
@@ -44,7 +44,6 @@
//
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
-define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;
#endif // OS_BSD_GLOBALS_BSD_HPP
diff --git a/src/hotspot/os/bsd/os_bsd.cpp b/src/hotspot/os/bsd/os_bsd.cpp
index 68196755732..d95cb6807d2 100644
--- a/src/hotspot/os/bsd/os_bsd.cpp
+++ b/src/hotspot/os/bsd/os_bsd.cpp
@@ -1905,7 +1905,7 @@ bool os::can_execute_large_page_memory() {
return false;
}
-char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, int file_desc) {
+char* os::pd_attempt_map_memory_to_file_at(char* requested_addr, size_t bytes, int file_desc) {
assert(file_desc >= 0, "file_desc is not valid");
char* result = pd_attempt_reserve_memory_at(requested_addr, bytes);
if (result != NULL) {
diff --git a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
index fada2a732bf..fb653c762bc 100644
--- a/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
+++ b/src/hotspot/os/linux/cgroupSubsystem_linux.cpp
@@ -294,14 +294,15 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
// Skip cgroup2 fs lines on hybrid or unified hierarchy.
continue;
}
- any_cgroup_mounts_found = true;
while ((token = strsep(&cptr, ",")) != NULL) {
if (strcmp(token, "memory") == 0) {
+ any_cgroup_mounts_found = true;
assert(cg_infos[MEMORY_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[MEMORY_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[MEMORY_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[MEMORY_IDX]._data_complete = true;
} else if (strcmp(token, "cpuset") == 0) {
+ any_cgroup_mounts_found = true;
if (cg_infos[CPUSET_IDX]._mount_path != NULL) {
// On some systems duplicate cpuset controllers get mounted in addition to
// the main cgroup controllers most likely under /sys/fs/cgroup. In that
@@ -321,11 +322,13 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
cg_infos[CPUSET_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPUSET_IDX]._data_complete = true;
} else if (strcmp(token, "cpu") == 0) {
+ any_cgroup_mounts_found = true;
assert(cg_infos[CPU_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[CPU_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPU_IDX]._root_mount_path = os::strdup(tmproot);
cg_infos[CPU_IDX]._data_complete = true;
} else if (strcmp(token, "cpuacct") == 0) {
+ any_cgroup_mounts_found = true;
assert(cg_infos[CPUACCT_IDX]._mount_path == NULL, "stomping of _mount_path");
cg_infos[CPUACCT_IDX]._mount_path = os::strdup(tmpmount);
cg_infos[CPUACCT_IDX]._root_mount_path = os::strdup(tmproot);
@@ -339,7 +342,7 @@ bool CgroupSubsystemFactory::determine_type(CgroupInfo* cg_infos,
// Neither cgroup2 nor cgroup filesystems mounted via /proc/self/mountinfo
// No point in continuing.
if (!any_cgroup_mounts_found) {
- log_trace(os, container)("No cgroup controllers mounted.");
+ log_trace(os, container)("No relevant cgroup controllers mounted.");
cleanup(cg_infos);
*flags = INVALID_CGROUPS_NO_MOUNT;
return false;
diff --git a/src/hotspot/os/linux/globals_linux.hpp b/src/hotspot/os/linux/globals_linux.hpp
index f2a889285af..a37fd28f0e4 100644
--- a/src/hotspot/os/linux/globals_linux.hpp
+++ b/src/hotspot/os/linux/globals_linux.hpp
@@ -79,7 +79,10 @@
"be dumped into the corefile.") \
\
product(bool, UseCpuAllocPath, false, DIAGNOSTIC, \
- "Use CPU_ALLOC code path in os::active_processor_count ")
+ "Use CPU_ALLOC code path in os::active_processor_count ") \
+ \
+ product(bool, DumpPerfMapAtExit, false, DIAGNOSTIC, \
+ "Write map file for Linux perf tool at exit")
// end of RUNTIME_OS_FLAGS
@@ -89,7 +92,6 @@
//
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, false);
-define_pd_global(bool, UseOSErrorReporting, false);
define_pd_global(bool, UseThreadPriorities, true) ;
#endif // OS_LINUX_GLOBALS_LINUX_HPP
diff --git a/src/hotspot/os/linux/osContainer_linux.hpp b/src/hotspot/os/linux/osContainer_linux.hpp
index 8775288d450..21801b7dc4b 100644
--- a/src/hotspot/os/linux/osContainer_linux.hpp
+++ b/src/hotspot/os/linux/osContainer_linux.hpp
@@ -41,8 +41,6 @@ class OSContainer: AllStatic {
static bool _is_containerized;
static int _active_processor_count;
- static jlong read_memory_limit_in_bytes();
-
public:
static void init();
static inline bool is_containerized();
diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
index 85560ccbbb0..e7e332c16b5 100644
--- a/src/hotspot/os/linux/os_linux.cpp
+++ b/src/hotspot/os/linux/os_linux.cpp
@@ -104,7 +104,6 @@
# include
# include
# include
-# include
# include
# include
# include
@@ -137,6 +136,17 @@
// for timer info max values which include all bits
#define ALL_64_BITS CONST64(0xFFFFFFFFFFFFFFFF)
+#ifdef MUSL_LIBC
+// dlvsym is not a part of POSIX
+// and musl libc doesn't implement it.
+static void *dlvsym(void *handle,
+ const char *symbol,
+ const char *version) {
+ // load the latest version of symbol
+ return dlsym(handle, symbol);
+}
+#endif
+
enum CoredumpFilterBit {
FILE_BACKED_PVT_BIT = 1 << 2,
FILE_BACKED_SHARED_BIT = 1 << 3,
@@ -156,7 +166,7 @@ int (*os::Linux::_pthread_setname_np)(pthread_t, const char*) = NULL;
pthread_t os::Linux::_main_thread;
int os::Linux::_page_size = -1;
bool os::Linux::_supports_fast_thread_cpu_time = false;
-const char * os::Linux::_glibc_version = NULL;
+const char * os::Linux::_libc_version = NULL;
const char * os::Linux::_libpthread_version = NULL;
size_t os::Linux::_default_large_page_size = 0;
@@ -510,17 +520,24 @@ void os::Linux::libpthread_init() {
#error "glibc too old (< 2.3.2)"
#endif
+#ifdef MUSL_LIBC
+ // confstr() from musl libc returns EINVAL for
+ // _CS_GNU_LIBC_VERSION and _CS_GNU_LIBPTHREAD_VERSION
+ os::Linux::set_libc_version("musl - unknown");
+ os::Linux::set_libpthread_version("musl - unknown");
+#else
size_t n = confstr(_CS_GNU_LIBC_VERSION, NULL, 0);
assert(n > 0, "cannot retrieve glibc version");
char *str = (char *)malloc(n, mtInternal);
confstr(_CS_GNU_LIBC_VERSION, str, n);
- os::Linux::set_glibc_version(str);
+ os::Linux::set_libc_version(str);
n = confstr(_CS_GNU_LIBPTHREAD_VERSION, NULL, 0);
assert(n > 0, "cannot retrieve pthread version");
str = (char *)malloc(n, mtInternal);
confstr(_CS_GNU_LIBPTHREAD_VERSION, str, n);
os::Linux::set_libpthread_version(str);
+#endif
}
/////////////////////////////////////////////////////////////////////////////
@@ -1935,7 +1952,7 @@ void * os::Linux::dll_load_in_vmthread(const char *filename, char *ebuf,
StackOverflow* overflow_state = jt->stack_overflow_state();
if (!overflow_state->stack_guard_zone_unused() && // Stack not yet fully initialized
overflow_state->stack_guards_enabled()) { // No pending stack overflow exceptions
- if (!os::guard_memory((char *)jt->stack_end(), overflow_state->stack_guard_zone_size())) {
+ if (!os::guard_memory((char *)jt->stack_end(), StackOverflow::stack_guard_zone_size())) {
warning("Attempt to reguard stack yellow zone failed.");
}
}
@@ -2211,7 +2228,7 @@ void os::get_summary_os_info(char* buf, size_t buflen) {
void os::Linux::print_libversion_info(outputStream* st) {
// libc, pthread
st->print("libc: ");
- st->print("%s ", os::Linux::glibc_version());
+ st->print("%s ", os::Linux::libc_version());
st->print("%s ", os::Linux::libpthread_version());
st->cr();
}
@@ -3070,6 +3087,8 @@ bool os::Linux::libnuma_init() {
if (handle != NULL) {
set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
libnuma_dlsym(handle, "numa_node_to_cpus")));
+ set_numa_node_to_cpus_v2(CAST_TO_FN_PTR(numa_node_to_cpus_v2_func_t,
+ libnuma_v2_dlsym(handle, "numa_node_to_cpus")));
set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
libnuma_dlsym(handle, "numa_max_node")));
set_numa_num_configured_nodes(CAST_TO_FN_PTR(numa_num_configured_nodes_func_t,
@@ -3198,7 +3217,17 @@ void os::Linux::rebuild_cpu_to_node_map() {
if (cpu_map[j] != 0) {
for (size_t k = 0; k < BitsPerCLong; k++) {
if (cpu_map[j] & (1UL << k)) {
- cpu_to_node()->at_put(j * BitsPerCLong + k, closest_node);
+ int cpu_index = j * BitsPerCLong + k;
+
+#ifndef PRODUCT
+ if (UseDebuggerErgo1 && cpu_index >= (int)cpu_num) {
+ // Some debuggers limit the processor count without
+ // intercepting the NUMA APIs. Just fake the values.
+ cpu_index = 0;
+ }
+#endif
+
+ cpu_to_node()->at_put(cpu_index, closest_node);
}
}
}
@@ -3208,6 +3237,26 @@ void os::Linux::rebuild_cpu_to_node_map() {
FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
}
+int os::Linux::numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
+ // use the latest version of numa_node_to_cpus if available
+ if (_numa_node_to_cpus_v2 != NULL) {
+
+ // libnuma bitmask struct
+ struct bitmask {
+ unsigned long size; /* number of bits in the map */
+ unsigned long *maskp;
+ };
+
+ struct bitmask mask;
+ mask.maskp = (unsigned long *)buffer;
+ mask.size = bufferlen * 8;
+ return _numa_node_to_cpus_v2(node, &mask);
+ } else if (_numa_node_to_cpus != NULL) {
+ return _numa_node_to_cpus(node, buffer, bufferlen);
+ }
+ return -1;
+}
+
int os::Linux::get_node_by_cpu(int cpu_id) {
if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
return cpu_to_node()->at(cpu_id);
@@ -3219,6 +3268,7 @@ GrowableArray* os::Linux::_cpu_to_node;
GrowableArray* os::Linux::_nindex_to_node;
os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
+os::Linux::numa_node_to_cpus_v2_func_t os::Linux::_numa_node_to_cpus_v2;
os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
os::Linux::numa_num_configured_nodes_func_t os::Linux::_numa_num_configured_nodes;
os::Linux::numa_available_func_t os::Linux::_numa_available;
@@ -4164,7 +4214,7 @@ bool os::can_execute_large_page_memory() {
return UseTransparentHugePages || UseHugeTLBFS;
}
-char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, int file_desc) {
+char* os::pd_attempt_map_memory_to_file_at(char* requested_addr, size_t bytes, int file_desc) {
assert(file_desc >= 0, "file_desc is not valid");
char* result = pd_attempt_reserve_memory_at(requested_addr, bytes);
if (result != NULL) {
@@ -4321,6 +4371,40 @@ jlong os::Linux::fast_thread_cpu_time(clockid_t clockid) {
extern void report_error(char* file_name, int line_no, char* title,
char* format, ...);
+// Some linux distributions (notably: Alpine Linux) include the
+// grsecurity in the kernel. Of particular interest from a JVM perspective
+// is PaX (https://pax.grsecurity.net/), which adds some security features
+// related to page attributes. Specifically, the MPROTECT PaX functionality
+// (https://pax.grsecurity.net/docs/mprotect.txt) prevents dynamic
+// code generation by disallowing a (previously) writable page to be
+// marked as executable. This is, of course, exactly what HotSpot does
+// for both JIT compiled method, as well as for stubs, adapters, etc.
+//
+// Instead of crashing "lazily" when trying to make a page executable,
+// this code probes for the presence of PaX and reports the failure
+// eagerly.
+static void check_pax(void) {
+ // Zero doesn't generate code dynamically, so no need to perform the PaX check
+#ifndef ZERO
+ size_t size = os::Linux::page_size();
+
+ void* p = ::mmap(NULL, size, PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ if (p == MAP_FAILED) {
+ log_debug(os)("os_linux.cpp: check_pax: mmap failed (%s)" , os::strerror(errno));
+ vm_exit_out_of_memory(size, OOM_MMAP_ERROR, "failed to allocate memory for PaX check.");
+ }
+
+ int res = ::mprotect(p, size, PROT_WRITE|PROT_EXEC);
+ if (res == -1) {
+ log_debug(os)("os_linux.cpp: check_pax: mprotect failed (%s)" , os::strerror(errno));
+ vm_exit_during_initialization(
+ "Failed to mark memory page as executable - check if grsecurity/PaX is enabled");
+ }
+
+ ::munmap(p, size);
+#endif
+}
+
// this is called _before_ most of the global arguments have been parsed
void os::init(void) {
char dummy; // used to get a guess on initial stack address
@@ -4354,6 +4438,8 @@ void os::init(void) {
Linux::_pthread_setname_np =
(int(*)(pthread_t, const char*))dlsym(RTLD_DEFAULT, "pthread_setname_np");
+ check_pax();
+
os::Posix::init();
initial_time_count = javaTimeNanos();
@@ -4493,7 +4579,7 @@ jint os::init_2(void) {
Linux::libpthread_init();
Linux::sched_getcpu_init();
log_info(os)("HotSpot is running with %s, %s",
- Linux::glibc_version(), Linux::libpthread_version());
+ Linux::libc_version(), Linux::libpthread_version());
if (UseNUMA || UseNUMAInterleaving) {
Linux::numa_init();
@@ -4549,6 +4635,12 @@ jint os::init_2(void) {
set_coredump_filter(FILE_BACKED_SHARED_BIT);
}
+ if (DumpPerfMapAtExit && FLAG_IS_DEFAULT(UseCodeCacheFlushing)) {
+ // Disable code cache flushing to ensure the map file written at
+ // exit contains all nmethods generated during execution.
+ FLAG_SET_DEFAULT(UseCodeCacheFlushing, false);
+ }
+
return JNI_OK;
}
@@ -4686,7 +4778,16 @@ int os::active_processor_count() {
uint os::processor_id() {
const int id = Linux::sched_getcpu();
- assert(id >= 0 && id < _processor_count, "Invalid processor id");
+
+#ifndef PRODUCT
+ if (UseDebuggerErgo1 && id >= _processor_count) {
+ // Some debuggers limit the processor count without limiting
+ // the returned processor ids. Fake the processor id.
+ return 0;
+ }
+#endif
+
+ assert(id >= 0 && id < _processor_count, "Invalid processor id [%d]", id);
return (uint)id;
}
diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp
index 45d3bbc9d74..26e40d2bdfb 100644
--- a/src/hotspot/os/linux/os_linux.hpp
+++ b/src/hotspot/os/linux/os_linux.hpp
@@ -42,7 +42,7 @@ class Linux {
static address _initial_thread_stack_bottom;
static uintptr_t _initial_thread_stack_size;
- static const char *_glibc_version;
+ static const char *_libc_version;
static const char *_libpthread_version;
static bool _supports_fast_thread_cpu_time;
@@ -69,7 +69,7 @@ class Linux {
static int commit_memory_impl(char* addr, size_t bytes,
size_t alignment_hint, bool exec);
- static void set_glibc_version(const char *s) { _glibc_version = s; }
+ static void set_libc_version(const char *s) { _libc_version = s; }
static void set_libpthread_version(const char *s) { _libpthread_version = s; }
static void rebuild_cpu_to_node_map();
@@ -139,10 +139,8 @@ class Linux {
static intptr_t* ucontext_get_sp(const ucontext_t* uc);
static intptr_t* ucontext_get_fp(const ucontext_t* uc);
- static bool get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr);
-
// GNU libc and libpthread version strings
- static const char *glibc_version() { return _glibc_version; }
+ static const char *libc_version() { return _libc_version; }
static const char *libpthread_version() { return _libpthread_version; }
static void libpthread_init();
@@ -183,6 +181,7 @@ class Linux {
typedef int (*sched_getcpu_func_t)(void);
typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
+ typedef int (*numa_node_to_cpus_v2_func_t)(int node, void *mask);
typedef int (*numa_max_node_func_t)(void);
typedef int (*numa_num_configured_nodes_func_t)(void);
typedef int (*numa_available_func_t)(void);
@@ -199,6 +198,7 @@ class Linux {
static sched_getcpu_func_t _sched_getcpu;
static numa_node_to_cpus_func_t _numa_node_to_cpus;
+ static numa_node_to_cpus_v2_func_t _numa_node_to_cpus_v2;
static numa_max_node_func_t _numa_max_node;
static numa_num_configured_nodes_func_t _numa_num_configured_nodes;
static numa_available_func_t _numa_available;
@@ -220,6 +220,7 @@ class Linux {
static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
+ static void set_numa_node_to_cpus_v2(numa_node_to_cpus_v2_func_t func) { _numa_node_to_cpus_v2 = func; }
static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
static void set_numa_num_configured_nodes(numa_num_configured_nodes_func_t func) { _numa_num_configured_nodes = func; }
static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
@@ -249,9 +250,7 @@ class Linux {
public:
static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
- static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
- return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
- }
+ static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen);
static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
static int numa_num_configured_nodes() {
return _numa_num_configured_nodes != NULL ? _numa_num_configured_nodes() : -1;
diff --git a/src/hotspot/os/posix/os_posix.cpp b/src/hotspot/os/posix/os_posix.cpp
index b819f4c202f..7298fb21638 100644
--- a/src/hotspot/os/posix/os_posix.cpp
+++ b/src/hotspot/os/posix/os_posix.cpp
@@ -30,6 +30,7 @@
#include "utilities/globalDefinitions.hpp"
#include "runtime/frame.inline.hpp"
#include "runtime/interfaceSupport.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
#include "services/memTracker.hpp"
#include "runtime/atomic.hpp"
#include "runtime/java.hpp"
@@ -296,37 +297,18 @@ char* os::replace_existing_mapping_with_file_mapping(char* base, size_t size, in
return map_memory_to_file(base, size, fd);
}
-// Multiple threads can race in this code, and can remap over each other with MAP_FIXED,
-// so on posix, unmap the section at the start and at the end of the chunk that we mapped
-// rather than unmapping and remapping the whole chunk to get requested alignment.
-char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
+static size_t calculate_aligned_extra_size(size_t size, size_t alignment) {
assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
"Alignment must be a multiple of allocation granularity (page size)");
assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
size_t extra_size = size + alignment;
assert(extra_size >= size, "overflow, size is too large to allow alignment");
+ return extra_size;
+}
- char* extra_base;
- if (file_desc != -1) {
- // For file mapping, we do not call os:reserve_memory_with_fd since:
- // - we later chop away parts of the mapping using os::release_memory and that could fail if the
- // original mmap call had been tied to an fd.
- // - The memory API os::reserve_memory uses is an implementation detail. It may (and usually is)
- // mmap but it also may System V shared memory which cannot be uncommitted as a whole, so
- // chopping off and unmapping excess bits back and front (see below) would not work.
- extra_base = reserve_mmapped_memory(extra_size, NULL);
- if (extra_base != NULL) {
- MemTracker::record_virtual_memory_reserve((address)extra_base, extra_size, CALLER_PC);
- }
- } else {
- extra_base = os::reserve_memory(extra_size);
- }
-
- if (extra_base == NULL) {
- return NULL;
- }
-
+// After a bigger chunk was mapped, unmaps start and end parts to get the requested alignment.
+static char* chop_extra_memory(size_t size, size_t alignment, char* extra_base, size_t extra_size) {
// Do manual alignment
char* aligned_base = align_up(extra_base, alignment);
@@ -348,13 +330,39 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
os::release_memory(extra_base + begin_offset + size, end_offset);
}
- if (file_desc != -1) {
- // After we have an aligned address, we can replace anonymous mapping with file mapping
- if (replace_existing_mapping_with_file_mapping(aligned_base, size, file_desc) == NULL) {
- vm_exit_during_initialization(err_msg("Error in mapping Java heap at the given filesystem directory"));
- }
- MemTracker::record_virtual_memory_commit((address)aligned_base, size, CALLER_PC);
+ return aligned_base;
+}
+
+// Multiple threads can race in this code, and can remap over each other with MAP_FIXED,
+// so on posix, unmap the section at the start and at the end of the chunk that we mapped
+// rather than unmapping and remapping the whole chunk to get requested alignment.
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+ size_t extra_size = calculate_aligned_extra_size(size, alignment);
+ char* extra_base = os::reserve_memory(extra_size);
+ if (extra_base == NULL) {
+ return NULL;
}
+ return chop_extra_memory(size, alignment, extra_base, extra_size);
+}
+
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int file_desc) {
+ size_t extra_size = calculate_aligned_extra_size(size, alignment);
+ // For file mapping, we do not call os:map_memory_to_file(size,fd) since:
+ // - we later chop away parts of the mapping using os::release_memory and that could fail if the
+ // original mmap call had been tied to an fd.
+ // - The memory API os::reserve_memory uses is an implementation detail. It may (and usually is)
+ // mmap but it also may System V shared memory which cannot be uncommitted as a whole, so
+ // chopping off and unmapping excess bits back and front (see below) would not work.
+ char* extra_base = reserve_mmapped_memory(extra_size, NULL);
+ if (extra_base == NULL) {
+ return NULL;
+ }
+ char* aligned_base = chop_extra_memory(size, alignment, extra_base, extra_size);
+ // After we have an aligned address, we can replace anonymous mapping with file mapping
+ if (replace_existing_mapping_with_file_mapping(aligned_base, size, file_desc) == NULL) {
+ vm_exit_during_initialization(err_msg("Error in mapping Java heap at the given filesystem directory"));
+ }
+ MemTracker::record_virtual_memory_commit((address)aligned_base, size, CALLER_PC);
return aligned_base;
}
@@ -907,6 +915,123 @@ size_t os::Posix::get_initial_stack_size(ThreadType thr_type, size_t req_stack_s
return stack_size;
}
+#ifndef ZERO
+#ifndef ARM
+static bool get_frame_at_stack_banging_point(JavaThread* thread, address pc, const void* ucVoid, frame* fr) {
+ if (Interpreter::contains(pc)) {
+ // interpreter performs stack banging after the fixed frame header has
+ // been generated while the compilers perform it before. To maintain
+ // semantic consistency between interpreted and compiled frames, the
+ // method returns the Java sender of the current frame.
+ *fr = os::fetch_frame_from_context(ucVoid);
+ if (!fr->is_first_java_frame()) {
+ // get_frame_at_stack_banging_point() is only called when we
+ // have well defined stacks so java_sender() calls do not need
+ // to assert safe_for_sender() first.
+ *fr = fr->java_sender();
+ }
+ } else {
+ // more complex code with compiled code
+ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
+ CodeBlob* cb = CodeCache::find_blob(pc);
+ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
+ // Not sure where the pc points to, fallback to default
+ // stack overflow handling
+ return false;
+ } else {
+ // in compiled code, the stack banging is performed just after the return pc
+ // has been pushed on the stack
+ *fr = os::fetch_compiled_frame_from_context(ucVoid);
+ if (!fr->is_java_frame()) {
+ assert(!fr->is_first_frame(), "Safety check");
+ // See java_sender() comment above.
+ *fr = fr->java_sender();
+ }
+ }
+ }
+ assert(fr->is_java_frame(), "Safety check");
+ return true;
+}
+#endif // ARM
+
+// This return true if the signal handler should just continue, ie. return after calling this
+bool os::Posix::handle_stack_overflow(JavaThread* thread, address addr, address pc,
+ const void* ucVoid, address* stub) {
+ // stack overflow
+ StackOverflow* overflow_state = thread->stack_overflow_state();
+ if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
+ if (thread->thread_state() == _thread_in_Java) {
+#ifndef ARM
+ // arm32 doesn't have this
+ if (overflow_state->in_stack_reserved_zone(addr)) {
+ frame fr;
+ if (get_frame_at_stack_banging_point(thread, pc, ucVoid, &fr)) {
+ assert(fr.is_java_frame(), "Must be a Java frame");
+ frame activation =
+ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
+ if (activation.sp() != NULL) {
+ overflow_state->disable_stack_reserved_zone();
+ if (activation.is_interpreted_frame()) {
+ overflow_state->set_reserved_stack_activation((address)(activation.fp()
+ // Some platforms use frame pointers for interpreter frames, others use initial sp.
+#if !defined(PPC64) && !defined(S390)
+ + frame::interpreter_frame_initial_sp_offset
+#endif
+ ));
+ } else {
+ overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
+ }
+ return true; // just continue
+ }
+ }
+ }
+#endif // ARM
+ // Throw a stack overflow exception. Guard pages will be reenabled
+ // while unwinding the stack.
+ overflow_state->disable_stack_yellow_reserved_zone();
+ *stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
+ } else {
+ // Thread was in the vm or native code. Return and try to finish.
+ overflow_state->disable_stack_yellow_reserved_zone();
+ return true; // just continue
+ }
+ } else if (overflow_state->in_stack_red_zone(addr)) {
+ // Fatal red zone violation. Disable the guard pages and fall through
+ // to handle_unexpected_exception way down below.
+ overflow_state->disable_stack_red_zone();
+ tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+
+ // This is a likely cause, but hard to verify. Let's just print
+ // it as a hint.
+ tty->print_raw_cr("Please check if any of your loaded .so files has "
+ "enabled executable stack (see man page execstack(8))");
+
+ } else {
+#if !defined(AIX) && !defined(__APPLE__)
+ // bsd and aix don't have this
+
+ // Accessing stack address below sp may cause SEGV if current
+ // thread has MAP_GROWSDOWN stack. This should only happen when
+ // current thread was created by user code with MAP_GROWSDOWN flag
+ // and then attached to VM. See notes in os_linux.cpp.
+ if (thread->osthread()->expanding_stack() == 0) {
+ thread->osthread()->set_expanding_stack();
+ if (os::Linux::manually_expand_stack(thread, addr)) {
+ thread->osthread()->clear_expanding_stack();
+ return true; // just continue
+ }
+ thread->osthread()->clear_expanding_stack();
+ } else {
+ fatal("recursive segv. expanding stack.");
+ }
+#else
+ tty->print_raw_cr("SIGSEGV happened inside stack but outside yellow and red zone.");
+#endif // AIX or BSD
+ }
+ return false;
+}
+#endif // ZERO
+
bool os::Posix::is_root(uid_t uid){
return ROOT_UID == uid;
}
diff --git a/src/hotspot/os/posix/os_posix.hpp b/src/hotspot/os/posix/os_posix.hpp
index 514932f57d3..7a6e7e60479 100644
--- a/src/hotspot/os/posix/os_posix.hpp
+++ b/src/hotspot/os/posix/os_posix.hpp
@@ -106,6 +106,10 @@ class Posix {
#endif
static void to_RTC_abstime(timespec* abstime, int64_t millis);
+
+ static bool handle_stack_overflow(JavaThread* thread, address addr, address pc,
+ const void* ucVoid,
+ address* stub);
};
/*
diff --git a/src/hotspot/os/posix/signals_posix.cpp b/src/hotspot/os/posix/signals_posix.cpp
index 4eea0664566..94251da145d 100644
--- a/src/hotspot/os/posix/signals_posix.cpp
+++ b/src/hotspot/os/posix/signals_posix.cpp
@@ -26,6 +26,7 @@
#include "jvm.h"
#include "logging/log.hpp"
+#include "runtime/atomic.hpp"
#include "runtime/globals.hpp"
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/os.hpp"
@@ -71,10 +72,6 @@ extern "C" {
static sigset_t check_signal_done;
static bool check_signals = true;
-// This boolean allows users to forward their own non-matching signals
-// to JVM_handle_bsd_signal/JVM_handle_linux_signal, harmlessly.
-static bool signal_handlers_are_installed = false;
-
debug_only(static bool signal_sets_initialized = false);
static sigset_t unblocked_sigs, vm_sigs, preinstalled_sigs;
struct sigaction sigact[NSIG];
@@ -261,6 +258,8 @@ static const struct {
{ -1, NULL }
};
+static const char* get_signal_name(int sig, char* out, size_t outlen);
+
////////////////////////////////////////////////////////////////////////////////
// sun.misc.Signal support
@@ -313,6 +312,8 @@ static int check_pending_signals() {
}
} while (threadIsSuspended);
}
+ ShouldNotReachHere();
+ return 0; // Satisfy compiler
}
int os::signal_wait() {
@@ -408,15 +409,76 @@ bool PosixSignals::chained_handler(int sig, siginfo_t* siginfo, void* context) {
return chained;
}
+///// Synchronous (non-deferrable) error signals (ILL, SEGV, FPE, BUS, TRAP):
+
+// These signals are special because they cannot be deferred and, if they
+// happen while delivery is blocked for the receiving thread, will cause UB
+// (in practice typically resulting in sudden process deaths or hangs, see
+// JDK-8252533). So we must take care never to block them when we cannot be
+// absolutely sure they won't happen. In practice, this is always.
+//
+// Relevant Posix quote:
+// "The behavior of a process is undefined after it ignores a SIGFPE, SIGILL,
+// SIGSEGV, or SIGBUS signal that was not generated by kill(), sigqueue(), or
+// raise()."
+//
+// We also include SIGTRAP in that list of never-to-block-signals. While not
+// mentioned by the Posix documentation, in our (SAPs) experience blocking it
+// causes similar problems. Beside, during normal operation - outside of error
+// handling - SIGTRAP may be used for implicit NULL checking, so it makes sense
+// to never block it.
+//
+// We deal with those signals in two ways:
+// - we just never explicitly block them, which includes not accidentally blocking
+// them via sa_mask when establishing signal handlers.
+// - as an additional safety measure, at the entrance of a signal handler, we
+// unblock them explicitly.
+
+static void add_error_signals_to_set(sigset_t* set) {
+ sigaddset(set, SIGILL);
+ sigaddset(set, SIGBUS);
+ sigaddset(set, SIGFPE);
+ sigaddset(set, SIGSEGV);
+ sigaddset(set, SIGTRAP);
+}
+
+static void remove_error_signals_from_set(sigset_t* set) {
+ sigdelset(set, SIGILL);
+ sigdelset(set, SIGBUS);
+ sigdelset(set, SIGFPE);
+ sigdelset(set, SIGSEGV);
+ sigdelset(set, SIGTRAP);
+}
+
+// Unblock all signals whose delivery cannot be deferred and which, if they happen
+// while delivery is blocked, would cause crashes or hangs (JDK-8252533).
+void PosixSignals::unblock_error_signals() {
+ sigset_t set;
+ sigemptyset(&set);
+ add_error_signals_to_set(&set);
+ ::pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+}
+
+class ErrnoPreserver: public StackObj {
+ const int _saved;
+public:
+ ErrnoPreserver() : _saved(errno) {}
+ ~ErrnoPreserver() { errno = _saved; }
+};
+
////////////////////////////////////////////////////////////////////////////////
-// signal handling (except suspend/resume)
+// JVM_handle_(linux|aix|bsd)_signal()
-// This routine may be used by user applications as a "hook" to catch signals.
+// This routine is the shared part of the central hotspot signal handler. It can
+// also be called by a user application, if a user application prefers to do
+// signal handling itself - in that case it needs to pass signals the VM
+// internally uses on to the VM first.
+//
// The user-defined signal handler must pass unrecognized signals to this
// routine, and if it returns true (non-zero), then the signal handler must
// return immediately. If the flag "abort_if_unrecognized" is true, then this
-// routine will never retun false (zero), but instead will execute a VM panic
-// routine kill the process.
+// routine will never return false (zero), but instead will execute a VM panic
+// routine to kill the process.
//
// If this routine returns false, it is OK to call it again. This allows
// the user-defined signal handler to perform checks either before or after
@@ -438,72 +500,116 @@ bool PosixSignals::chained_handler(int sig, siginfo_t* siginfo, void* context) {
//
#if defined(BSD)
-extern "C" JNIEXPORT int JVM_handle_bsd_signal(int signo, siginfo_t* siginfo,
- void* ucontext,
- int abort_if_unrecognized);
+#define JVM_HANDLE_XXX_SIGNAL JVM_handle_bsd_signal
#elif defined(AIX)
-extern "C" JNIEXPORT int JVM_handle_aix_signal(int signo, siginfo_t* siginfo,
- void* ucontext,
- int abort_if_unrecognized);
+#define JVM_HANDLE_XXX_SIGNAL JVM_handle_aix_signal
+#elif defined(LINUX)
+#define JVM_HANDLE_XXX_SIGNAL JVM_handle_linux_signal
#else
-extern "C" JNIEXPORT int JVM_handle_linux_signal(int signo, siginfo_t* siginfo,
- void* ucontext,
- int abort_if_unrecognized);
+#error who are you?
#endif
-#if defined(AIX)
+extern "C" JNIEXPORT
+int JVM_HANDLE_XXX_SIGNAL(int sig, siginfo_t* info,
+ void* ucVoid, int abort_if_unrecognized)
+{
+ assert(info != NULL && ucVoid != NULL, "sanity");
-// Set thread signal mask (for some reason on AIX sigthreadmask() seems
-// to be the thing to call; documentation is not terribly clear about whether
-// pthread_sigmask also works, and if it does, whether it does the same.
-bool set_thread_signal_mask(int how, const sigset_t* set, sigset_t* oset) {
- const int rc = ::pthread_sigmask(how, set, oset);
- // return value semantics differ slightly for error case:
- // pthread_sigmask returns error number, sigthreadmask -1 and sets global errno
- // (so, pthread_sigmask is more theadsafe for error handling)
- // But success is always 0.
- return rc == 0 ? true : false;
-}
+ // Note: it's not uncommon that JNI code uses signal/sigset to install,
+ // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
+ // or have a SIGILL handler when detecting CPU type). When that happens,
+ // this handler might be invoked with junk info/ucVoid. To avoid unnecessary
+ // crash when libjsig is not preloaded, try handle signals that do not require
+ // siginfo/ucontext first.
-// Function to unblock all signals which are, according
-// to POSIX, typical program error signals. If they happen while being blocked,
-// they typically will bring down the process immediately.
-bool unblock_program_error_signals() {
- sigset_t set;
- sigemptyset(&set);
- sigaddset(&set, SIGILL);
- sigaddset(&set, SIGBUS);
- sigaddset(&set, SIGFPE);
- sigaddset(&set, SIGSEGV);
- return set_thread_signal_mask(SIG_UNBLOCK, &set, NULL);
-}
+ // Preserve errno value over signal handler.
+ // (note: RAII ok here, even with JFR thread crash protection, see below).
+ ErrnoPreserver ep;
-#endif
+ // Unblock all synchronous error signals (see JDK-8252533)
+ PosixSignals::unblock_error_signals();
-// Renamed from 'signalHandler' to avoid collision with other shared libs.
-static void javaSignalHandler(int sig, siginfo_t* info, void* uc) {
- assert(info != NULL && uc != NULL, "it must be old kernel");
+ ucontext_t* const uc = (ucontext_t*) ucVoid;
+ Thread* const t = Thread::current_or_null_safe();
-// TODO: reconcile the differences between Linux/BSD vs AIX here!
-#if defined(AIX)
- // Never leave program error signals blocked;
- // on all our platforms they would bring down the process immediately when
- // getting raised while being blocked.
- unblock_program_error_signals();
+ // Handle JFR thread crash protection.
+ // Note: this may cause us to longjmp away. Do not use any code before this
+ // point which really needs any form of epilogue code running, eg RAII objects.
+ os::ThreadCrashProtection::check_crash_protection(sig, t);
+
+ bool signal_was_handled = false;
+
+ // Handle assertion poison page accesses.
+#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
+ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
+ signal_was_handled = handle_assert_poison_fault(ucVoid, info->si_addr);
+ }
#endif
- int orig_errno = errno; // Preserve errno value over signal handler.
-#if defined(BSD)
- JVM_handle_bsd_signal(sig, info, uc, true);
-#elif defined(AIX)
- JVM_handle_aix_signal(sig, info, uc, true);
+ // Ignore SIGPIPE and SIGXFSZ (4229104, 6499219).
+ if (sig == SIGPIPE || sig == SIGXFSZ) {
+ PosixSignals::chained_handler(sig, info, ucVoid);
+ signal_was_handled = true; // unconditionally.
+ }
+
+ // Call platform dependent signal handler.
+ if (!signal_was_handled) {
+ JavaThread* const jt = (t != NULL && t->is_Java_thread()) ? (JavaThread*) t : NULL;
+ signal_was_handled = PosixSignals::pd_hotspot_signal_handler(sig, info, uc, jt);
+ }
+
+ // From here on, if the signal had not been handled, it is a fatal error.
+
+ // Give the chained signal handler - should it exist - a shot.
+ if (!signal_was_handled) {
+ signal_was_handled = PosixSignals::chained_handler(sig, info, ucVoid);
+ }
+
+ // Invoke fatal error handling.
+ if (!signal_was_handled && abort_if_unrecognized) {
+ // Extract pc from context for the error handler to display.
+ address pc = NULL;
+ if (uc != NULL) {
+ // prepare fault pc address for error reporting.
+ if (S390_ONLY(sig == SIGILL || sig == SIGFPE) NOT_S390(false)) {
+ pc = (address)info->si_addr;
+ } else {
+ pc = PosixSignals::ucontext_get_pc(uc);
+ }
+ }
+#if defined(ZERO) && !defined(PRODUCT)
+ char buf[64];
+ VMError::report_and_die(t, sig, pc, info, ucVoid,
+ "\n#"
+ "\n# /--------------------\\"
+ "\n# | %-7s |"
+ "\n# \\---\\ /--------------/"
+ "\n# /"
+ "\n# [-] |\\_/| "
+ "\n# (+)=C |o o|__ "
+ "\n# | | =-*-=__\\ "
+ "\n# OOO c_c_(___)",
+ get_signal_name(sig, buf, sizeof(buf)));
#else
- JVM_handle_linux_signal(sig, info, uc, true);
+ VMError::report_and_die(t, sig, pc, info, ucVoid);
#endif
- errno = orig_errno;
+ // VMError should not return.
+ ShouldNotReachHere();
+ }
+ return signal_was_handled;
+}
+
+// Entry point for the hotspot signal handler.
+static void javaSignalHandler(int sig, siginfo_t* info, void* ucVoid) {
+ // Do not add any code here!
+ // Only add code to either JVM_HANDLE_XXX_SIGNAL or PosixSignals::pd_hotspot_signal_handler.
+ (void)JVM_HANDLE_XXX_SIGNAL(sig, info, ucVoid, true);
}
static void UserHandler(int sig, void *siginfo, void *context) {
+
+ PosixSignals::unblock_error_signals();
+
// Ctrl-C is pressed during error reporting, likely because the error
// handler fails to abort. Let VM die immediately.
if (sig == SIGINT && VMError::is_error_reported()) {
@@ -702,23 +808,7 @@ void* os::signal(int signal_number, void* handler) {
struct sigaction sigAct, oldSigAct;
sigfillset(&(sigAct.sa_mask));
-
-#if defined(AIX)
- // Do not block out synchronous signals in the signal handler.
- // Blocking synchronous signals only makes sense if you can really
- // be sure that those signals won't happen during signal handling,
- // when the blocking applies. Normal signal handlers are lean and
- // do not cause signals. But our signal handlers tend to be "risky"
- // - secondary SIGSEGV, SIGILL, SIGBUS' may and do happen.
- // On AIX, PASE there was a case where a SIGSEGV happened, followed
- // by a SIGILL, which was blocked due to the signal mask. The process
- // just hung forever. Better to crash from a secondary signal than to hang.
- sigdelset(&(sigAct.sa_mask), SIGSEGV);
- sigdelset(&(sigAct.sa_mask), SIGBUS);
- sigdelset(&(sigAct.sa_mask), SIGILL);
- sigdelset(&(sigAct.sa_mask), SIGFPE);
- sigdelset(&(sigAct.sa_mask), SIGTRAP);
-#endif
+ remove_error_signals_from_set(&(sigAct.sa_mask));
sigAct.sa_flags = SA_RESTART|SA_SIGINFO;
sigAct.sa_handler = CAST_TO_FN_PTR(sa_handler_t, handler);
@@ -763,9 +853,7 @@ void os::run_periodic_checks() {
do_signal_check(SIGBUS);
do_signal_check(SIGPIPE);
do_signal_check(SIGXFSZ);
-#if defined(PPC64)
- do_signal_check(SIGTRAP);
-#endif
+ PPC64_ONLY(do_signal_check(SIGTRAP);)
// ReduceSignalUsage allows the user to override these handlers
// see comments at the very top and jvm_md.h
@@ -932,7 +1020,6 @@ static bool is_valid_signal(int sig) {
#endif
}
-// Returned string is a constant. For unknown signals "UNKNOWN" is returned.
static const char* get_signal_name(int sig, char* out, size_t outlen) {
const char* ret = NULL;
@@ -1072,7 +1159,7 @@ int os::get_signal_number(const char* signal_name) {
return -1;
}
-void set_signal_handler(int sig, bool set_installed) {
+void set_signal_handler(int sig) {
// Check for overwrite.
struct sigaction oldAct;
sigaction(sig, (struct sigaction*)NULL, &oldAct);
@@ -1083,7 +1170,7 @@ void set_signal_handler(int sig, bool set_installed) {
if (oldhand != CAST_FROM_FN_PTR(void*, SIG_DFL) &&
oldhand != CAST_FROM_FN_PTR(void*, SIG_IGN) &&
oldhand != CAST_FROM_FN_PTR(void*, (sa_sigaction_t)javaSignalHandler)) {
- if (AllowUserSignalHandlers || !set_installed) {
+ if (AllowUserSignalHandlers) {
// Do not overwrite; user takes responsibility to forward to us.
return;
} else if (UseSignalChaining) {
@@ -1099,13 +1186,9 @@ void set_signal_handler(int sig, bool set_installed) {
struct sigaction sigAct;
sigfillset(&(sigAct.sa_mask));
- sigAct.sa_handler = SIG_DFL;
- if (!set_installed) {
- sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
- } else {
- sigAct.sa_sigaction = javaSignalHandler;
- sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
- }
+ remove_error_signals_from_set(&(sigAct.sa_mask));
+ sigAct.sa_sigaction = javaSignalHandler;
+ sigAct.sa_flags = SA_SIGINFO|SA_RESTART;
#if defined(__APPLE__)
// Needed for main thread as XNU (Mac OS X kernel) will only deliver SIGSEGV
// (which starts as SIGBUS) on main thread with faulting address inside "stack+guard pages"
@@ -1132,87 +1215,75 @@ void set_signal_handler(int sig, bool set_installed) {
assert(oldhand2 == oldhand, "no concurrent signal handler installation");
}
-// install signal handlers for signals that HotSpot needs to
-// handle in order to support Java-level exception handling.
-
-bool PosixSignals::are_signal_handlers_installed() {
- return signal_handlers_are_installed;
-}
-
// install signal handlers for signals that HotSpot needs to
// handle in order to support Java-level exception handling.
void PosixSignals::install_signal_handlers() {
- if (!signal_handlers_are_installed) {
- signal_handlers_are_installed = true;
-
- // signal-chaining
- typedef void (*signal_setting_t)();
- signal_setting_t begin_signal_setting = NULL;
- signal_setting_t end_signal_setting = NULL;
- begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
- dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting"));
- if (begin_signal_setting != NULL) {
- end_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
- dlsym(RTLD_DEFAULT, "JVM_end_signal_setting"));
- get_signal_action = CAST_TO_FN_PTR(get_signal_t,
- dlsym(RTLD_DEFAULT, "JVM_get_signal_action"));
- libjsig_is_loaded = true;
- assert(UseSignalChaining, "should enable signal-chaining");
- }
- if (libjsig_is_loaded) {
- // Tell libjsig jvm is setting signal handlers
- (*begin_signal_setting)();
- }
- set_signal_handler(SIGSEGV, true);
- set_signal_handler(SIGPIPE, true);
- set_signal_handler(SIGBUS, true);
- set_signal_handler(SIGILL, true);
- set_signal_handler(SIGFPE, true);
-#if defined(PPC64) || defined(AIX)
- set_signal_handler(SIGTRAP, true);
-#endif
- set_signal_handler(SIGXFSZ, true);
+ // signal-chaining
+ typedef void (*signal_setting_t)();
+ signal_setting_t begin_signal_setting = NULL;
+ signal_setting_t end_signal_setting = NULL;
+ begin_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
+ dlsym(RTLD_DEFAULT, "JVM_begin_signal_setting"));
+ if (begin_signal_setting != NULL) {
+ end_signal_setting = CAST_TO_FN_PTR(signal_setting_t,
+ dlsym(RTLD_DEFAULT, "JVM_end_signal_setting"));
+ get_signal_action = CAST_TO_FN_PTR(get_signal_t,
+ dlsym(RTLD_DEFAULT, "JVM_get_signal_action"));
+ libjsig_is_loaded = true;
+ assert(UseSignalChaining, "should enable signal-chaining");
+ }
+ if (libjsig_is_loaded) {
+ // Tell libjsig jvm is setting signal handlers
+ (*begin_signal_setting)();
+ }
+
+ set_signal_handler(SIGSEGV);
+ set_signal_handler(SIGPIPE);
+ set_signal_handler(SIGBUS);
+ set_signal_handler(SIGILL);
+ set_signal_handler(SIGFPE);
+ PPC64_ONLY(set_signal_handler(SIGTRAP);)
+ set_signal_handler(SIGXFSZ);
#if defined(__APPLE__)
- // In Mac OS X 10.4, CrashReporter will write a crash log for all 'fatal' signals, including
- // signals caught and handled by the JVM. To work around this, we reset the mach task
- // signal handler that's placed on our process by CrashReporter. This disables
- // CrashReporter-based reporting.
- //
- // This work-around is not necessary for 10.5+, as CrashReporter no longer intercedes
- // on caught fatal signals.
- //
- // Additionally, gdb installs both standard BSD signal handlers, and mach exception
- // handlers. By replacing the existing task exception handler, we disable gdb's mach
- // exception handling, while leaving the standard BSD signal handlers functional.
- kern_return_t kr;
- kr = task_set_exception_ports(mach_task_self(),
- EXC_MASK_BAD_ACCESS | EXC_MASK_ARITHMETIC,
- MACH_PORT_NULL,
- EXCEPTION_STATE_IDENTITY,
- MACHINE_THREAD_STATE);
-
- assert(kr == KERN_SUCCESS, "could not set mach task signal handler");
+ // In Mac OS X 10.4, CrashReporter will write a crash log for all 'fatal' signals, including
+ // signals caught and handled by the JVM. To work around this, we reset the mach task
+ // signal handler that's placed on our process by CrashReporter. This disables
+ // CrashReporter-based reporting.
+ //
+ // This work-around is not necessary for 10.5+, as CrashReporter no longer intercedes
+ // on caught fatal signals.
+ //
+ // Additionally, gdb installs both standard BSD signal handlers, and mach exception
+ // handlers. By replacing the existing task exception handler, we disable gdb's mach
+ // exception handling, while leaving the standard BSD signal handlers functional.
+ kern_return_t kr;
+ kr = task_set_exception_ports(mach_task_self(),
+ EXC_MASK_BAD_ACCESS | EXC_MASK_ARITHMETIC,
+ MACH_PORT_NULL,
+ EXCEPTION_STATE_IDENTITY,
+ MACHINE_THREAD_STATE);
+
+ assert(kr == KERN_SUCCESS, "could not set mach task signal handler");
#endif
+ if (libjsig_is_loaded) {
+ // Tell libjsig jvm finishes setting signal handlers
+ (*end_signal_setting)();
+ }
+
+ // We don't activate signal checker if libjsig is in place, we trust ourselves
+ // and if UserSignalHandler is installed all bets are off.
+ // Log that signal checking is off only if -verbose:jni is specified.
+ if (CheckJNICalls) {
if (libjsig_is_loaded) {
- // Tell libjsig jvm finishes setting signal handlers
- (*end_signal_setting)();
+ log_debug(jni, resolve)("Info: libjsig is activated, all active signal checking is disabled");
+ check_signals = false;
}
-
- // We don't activate signal checker if libjsig is in place, we trust ourselves
- // and if UserSignalHandler is installed all bets are off.
- // Log that signal checking is off only if -verbose:jni is specified.
- if (CheckJNICalls) {
- if (libjsig_is_loaded) {
- log_debug(jni, resolve)("Info: libjsig is activated, all active signal checking is disabled");
- check_signals = false;
- }
- if (AllowUserSignalHandlers) {
- log_debug(jni, resolve)("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
- check_signals = false;
- }
+ if (AllowUserSignalHandlers) {
+ log_debug(jni, resolve)("Info: AllowUserSignalHandlers is activated, all active signal checking is disabled");
+ check_signals = false;
}
}
}
@@ -1303,10 +1374,6 @@ bool PosixSignals::is_sig_ignored(int sig) {
}
}
-int PosixSignals::unblock_thread_signal_mask(const sigset_t *set) {
- return pthread_sigmask(SIG_UNBLOCK, set, NULL);
-}
-
address PosixSignals::ucontext_get_pc(const ucontext_t* ctx) {
#if defined(AIX)
return os::Aix::ucontext_get_pc(ctx);
@@ -1354,9 +1421,7 @@ void PosixSignals::signal_sets_init() {
sigaddset(&unblocked_sigs, SIGSEGV);
sigaddset(&unblocked_sigs, SIGBUS);
sigaddset(&unblocked_sigs, SIGFPE);
- #if defined(PPC64) || defined(AIX)
- sigaddset(&unblocked_sigs, SIGTRAP);
- #endif
+ PPC64_ONLY(sigaddset(&unblocked_sigs, SIGTRAP);)
sigaddset(&unblocked_sigs, SR_signum);
if (!ReduceSignalUsage) {
@@ -1470,10 +1535,13 @@ static void suspend_save_context(OSThread *osthread, siginfo_t* siginfo, ucontex
// Currently only ever called on the VMThread and JavaThreads (PC sampling)
//
static void SR_handler(int sig, siginfo_t* siginfo, ucontext_t* context) {
+
// Save and restore errno to avoid confusing native code with EINTR
// after sigsuspend.
int old_errno = errno;
+ PosixSignals::unblock_error_signals();
+
Thread* thread = Thread::current_or_null_safe();
assert(thread != NULL, "Missing current thread in SR_handler");
@@ -1567,6 +1635,7 @@ int PosixSignals::SR_initialize() {
// SR_signum is blocked by default.
pthread_sigmask(SIG_BLOCK, NULL, &act.sa_mask);
+ remove_error_signals_from_set(&(act.sa_mask));
if (sigaction(SR_signum, &act, 0) == -1) {
return -1;
diff --git a/src/hotspot/os/posix/signals_posix.hpp b/src/hotspot/os/posix/signals_posix.hpp
index a2f7d955e14..7f0f1b55884 100644
--- a/src/hotspot/os/posix/signals_posix.hpp
+++ b/src/hotspot/os/posix/signals_posix.hpp
@@ -38,14 +38,16 @@ class PosixSignals : public AllStatic {
public:
- static bool are_signal_handlers_installed();
+ // The platform dependent parts of the central hotspot signal handler.
+ // Returns true if the signal had been recognized and handled, false if not. If true, caller should
+ // return from signal handling.
+ static bool pd_hotspot_signal_handler(int sig, siginfo_t* info, ucontext_t* uc, JavaThread* thread);
+
static void install_signal_handlers();
static bool is_sig_ignored(int sig);
static void signal_sets_init();
- // unblocks the signal masks for current thread
- static int unblock_thread_signal_mask(const sigset_t *set);
static void hotspot_sigmask(Thread* thread);
static void print_signal_handler(outputStream* st, int sig, char* buf, size_t buflen);
@@ -64,6 +66,11 @@ class PosixSignals : public AllStatic {
// sun.misc.Signal support
static void jdk_misc_signal_init();
+
+ // Unblock all signals whose delivery cannot be deferred and which, if they happen
+ // while delivery is blocked, would cause crashes or hangs (see JDK-8252533).
+ static void unblock_error_signals();
+
};
#endif // OS_POSIX_SIGNALS_POSIX_HPP
diff --git a/src/hotspot/os/posix/vmError_posix.cpp b/src/hotspot/os/posix/vmError_posix.cpp
index 9c83d263e71..bde46e28741 100644
--- a/src/hotspot/os/posix/vmError_posix.cpp
+++ b/src/hotspot/os/posix/vmError_posix.cpp
@@ -101,15 +101,8 @@ address VMError::get_resetted_sighandler(int sig) {
}
static void crash_handler(int sig, siginfo_t* info, void* ucVoid) {
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- // also unmask other synchronous signals
- for (int i = 0; i < NUM_SIGNALS; i++) {
- sigaddset(&newset, SIGNALS[i]);
- }
- PosixSignals::unblock_thread_signal_mask(&newset);
+
+ PosixSignals::unblock_error_signals();
// support safefetch faults in error handling
ucontext_t* const uc = (ucontext_t*) ucVoid;
@@ -139,16 +132,10 @@ static void crash_handler(int sig, siginfo_t* info, void* ucVoid) {
}
void VMError::reset_signal_handlers() {
- // install signal handlers for all synchronous program error signals
- sigset_t newset;
- sigemptyset(&newset);
-
for (int i = 0; i < NUM_SIGNALS; i++) {
save_signal(i, SIGNALS[i]);
os::signal(SIGNALS[i], CAST_FROM_FN_PTR(void *, crash_handler));
- sigaddset(&newset, SIGNALS[i]);
}
- PosixSignals::unblock_thread_signal_mask(&newset);
}
// Write a hint to the stream in case siginfo relates to a segv/bus error
diff --git a/src/hotspot/os/windows/globals_windows.hpp b/src/hotspot/os/windows/globals_windows.hpp
index a712e102a76..61157041f88 100644
--- a/src/hotspot/os/windows/globals_windows.hpp
+++ b/src/hotspot/os/windows/globals_windows.hpp
@@ -28,24 +28,25 @@
//
// Declare Windows specific flags. They are not available on other platforms.
//
-#define RUNTIME_OS_FLAGS(develop, \
- develop_pd, \
- product, \
- product_pd, \
- notproduct, \
- range, \
- constraint)
+#define RUNTIME_OS_FLAGS(develop, \
+ develop_pd, \
+ product, \
+ product_pd, \
+ notproduct, \
+ range, \
+ constraint) \
+ \
+product(bool, UseOSErrorReporting, false, \
+ "Let VM fatal error propagate to the OS (ie. WER on Windows)")
// end of RUNTIME_OS_FLAGS
-
//
// Defines Windows-specific default values. The flags are available on all
// platforms, but they may have different default values on other platforms.
//
define_pd_global(bool, UseLargePages, false);
define_pd_global(bool, UseLargePagesIndividualAllocation, true);
-define_pd_global(bool, UseOSErrorReporting, false); // for now.
define_pd_global(bool, UseThreadPriorities, true) ;
#endif // OS_WINDOWS_GLOBALS_WINDOWS_HPP
diff --git a/src/hotspot/os/windows/os_windows.cpp b/src/hotspot/os/windows/os_windows.cpp
index 0416605e309..3b829cddac7 100644
--- a/src/hotspot/os/windows/os_windows.cpp
+++ b/src/hotspot/os/windows/os_windows.cpp
@@ -2144,6 +2144,8 @@ static int check_pending_signals() {
}
} while (threadIsSuspended);
}
+ ShouldNotReachHere();
+ return 0; // Satisfy compiler
}
int os::signal_wait() {
@@ -2354,7 +2356,7 @@ static inline void report_error(Thread* t, DWORD exception_code,
address addr, void* siginfo, void* context) {
VMError::report_and_die(t, exception_code, addr, siginfo, context);
- // If UseOsErrorReporting, this will return here and save the error file
+ // If UseOSErrorReporting, this will return here and save the error file
// somewhere where we can find it in the minidump.
}
@@ -3137,7 +3139,7 @@ void os::split_reserved_memory(char *base, size_t size, size_t split) {
// Multiple threads can race in this code but it's not possible to unmap small sections of
// virtual space to get requested alignment, like posix-like os's.
// Windows prevents multiple thread from remapping over each other so this loop is thread-safe.
-char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
+static char* map_or_reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
assert((alignment & (os::vm_allocation_granularity() - 1)) == 0,
"Alignment must be a multiple of allocation granularity (page size)");
assert((size & (alignment -1)) == 0, "size must be 'alignment' aligned");
@@ -3148,7 +3150,9 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
char* aligned_base = NULL;
do {
- char* extra_base = os::reserve_memory_with_fd(extra_size, file_desc);
+ char* extra_base = file_desc != -1 ?
+ os::map_memory_to_file(extra_size, file_desc) :
+ os::reserve_memory(extra_size);
if (extra_base == NULL) {
return NULL;
}
@@ -3161,13 +3165,23 @@ char* os::reserve_memory_aligned(size_t size, size_t alignment, int file_desc) {
os::release_memory(extra_base, extra_size);
}
- aligned_base = os::attempt_reserve_memory_at(aligned_base, size, file_desc);
+ aligned_base = file_desc != -1 ?
+ os::attempt_map_memory_to_file_at(aligned_base, size, file_desc) :
+ os::attempt_reserve_memory_at(aligned_base, size);
} while (aligned_base == NULL);
return aligned_base;
}
+char* os::reserve_memory_aligned(size_t size, size_t alignment) {
+ return map_or_reserve_memory_aligned(size, alignment, -1 /* file_desc */);
+}
+
+char* os::map_memory_to_file_aligned(size_t size, size_t alignment, int fd) {
+ return map_or_reserve_memory_aligned(size, alignment, fd);
+}
+
char* os::pd_reserve_memory(size_t bytes) {
return pd_attempt_reserve_memory_at(NULL /* addr */, bytes);
}
@@ -3205,7 +3219,7 @@ char* os::pd_attempt_reserve_memory_at(char* addr, size_t bytes) {
return res;
}
-char* os::pd_attempt_reserve_memory_at(char* requested_addr, size_t bytes, int file_desc) {
+char* os::pd_attempt_map_memory_to_file_at(char* requested_addr, size_t bytes, int file_desc) {
assert(file_desc >= 0, "file_desc is not valid");
return map_memory_to_file(requested_addr, bytes, file_desc);
}
diff --git a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
index 94e515932f2..f214eee454a 100644
--- a/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
+++ b/src/hotspot/os_cpu/aix_ppc/os_aix_ppc.cpp
@@ -142,40 +142,11 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return fr;
}
-bool os::Aix::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Aix::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // Interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- *fr = fr->java_sender();
- }
- } else {
- // More complex code with compiled code.
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling. In compiled code, we bang before
- // the frame is complete.
- return false;
- } else {
- intptr_t* sp = os::Aix::ucontext_get_sp(uc);
- address lr = ucontext_get_lr(uc);
- *fr = frame(sp, lr);
- if (!fr->is_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- assert(!fr->is_first_frame(), "Safety check");
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ intptr_t* sp = os::Aix::ucontext_get_sp(uc);
+ address lr = ucontext_get_lr(uc);
+ return frame(sp, lr);
}
frame os::get_sender_for_C_frame(frame* fr) {
@@ -197,45 +168,8 @@ frame os::current_frame() {
return os::get_sender_for_C_frame(&tmp);
}
-// Utility functions
-
-extern "C" JNIEXPORT int
-JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrecognized) {
-
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_aix_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE) {
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return 1;
- } else {
- // Ignoring SIGPIPE - see bugs 4229104
- return 1;
- }
- }
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL) {
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()) {
- vmthread = (VMThread *)t;
- }
- }
- }
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
// Decide if this trap can be handled by a stub.
address stub = NULL;
@@ -257,8 +191,8 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
}
}
- if (info == NULL || uc == NULL || thread == NULL && vmthread == NULL) {
- goto run_chained_handler;
+ if (info == NULL || uc == NULL) {
+ return false; // Fatal error
}
// If we are a java thread...
@@ -267,56 +201,13 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
// Handle ALL stack overflow variations here
if (sig == SIGSEGV && thread->is_in_full_stack(addr)) {
// stack overflow
- StackOverflow* overflow_state = thread->stack_overflow_state();
-
- //
- // If we are in a yellow zone and we are inside java, we disable the yellow zone and
- // throw a stack overflow exception.
- // If we are in native code or VM C code, we report-and-die. The original coding tried
- // to continue with yellow zone disabled, but that doesn't buy us much and prevents
- // hs_err_pid files.
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Aix::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Javac frame");
- frame activation =
- SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)activation.fp());
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception.
- // Guard pages will be reenabled while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- goto run_stub;
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
- goto report_and_die;
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
+ } else if (stub != NULL) {
+ goto run_stub;
} else {
- // This means a segv happened inside our stack, but not in
- // the guarded zone. I'd like to know when this happens,
- tty->print_raw_cr("SIGSEGV happened inside stack but outside yellow and red zone.");
- goto report_and_die;
+ return false; // Fatal error
}
-
} // end handle SIGSEGV inside stack boundaries
if (thread->thread_state() == _thread_in_Java) {
@@ -355,17 +246,6 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
// happens rarely. In heap based and disjoint base compressd oop modes also loads
// are used for null checks.
- // A VM-related SIGILL may only occur if we are not in the zero page.
- // On AIX, we get a SIGILL if we jump to 0x0 or to somewhere else
- // in the zero page, because it is filled with 0x0. We ignore
- // explicit SIGILLs in the zero page.
- if (sig == SIGILL && (pc < (address) 0x200)) {
- if (TraceTraps) {
- tty->print_raw_cr("SIGILL happened inside zero page.");
- }
- goto report_and_die;
- }
-
int stop_type = -1;
// Handle signal from NativeJump::patch_verified_entry().
if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
@@ -458,10 +338,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
tty->print_cr("trap: %s: %s (SIGTRAP, stop type %d)", msg, detail_msg, stop_type);
}
- va_list detail_args;
- VMError::report_and_die(INTERNAL_ERROR, msg, detail_msg, detail_args, thread,
- pc, info, ucVoid, NULL, 0, 0);
- va_end(detail_args);
+ return false; // Fatal error
}
else if (sig == SIGBUS) {
@@ -477,7 +354,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
}
next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
os::Aix::ucontext_set_pc(uc, next_pc);
- return 1;
+ return true;
}
}
}
@@ -502,7 +379,7 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
}
next_pc = SharedRuntime::handle_unsafe_access(thread, next_pc);
os::Aix::ucontext_set_pc(uc, next_pc);
- return 1;
+ return true;
}
}
@@ -524,32 +401,10 @@ JVM_handle_aix_signal(int sig, siginfo_t* info, void* ucVoid, int abort_if_unrec
// Save all thread context in case we need to restore it.
if (thread != NULL) thread->set_saved_exception_pc(pc);
os::Aix::ucontext_set_pc(uc, stub);
- return 1;
+ return true;
}
-run_chained_handler:
-
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return 1;
- }
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return 0;
- }
-
-report_and_die:
-
- // Use sigthreadmask instead of sigprocmask on AIX and unmask current signal.
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigthreadmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
- return 0;
+ return false; // Fatal error
}
void os::Aix::init_thread_fpu_state(void) {
diff --git a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp
index c6c72b9c92d..0b1a2424d2b 100644
--- a/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp
+++ b/src/hotspot/os_cpu/bsd_x86/os_bsd_x86.cpp
@@ -339,41 +339,12 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(sp, fp, epc);
}
-bool os::Bsd::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Bsd::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- // get_frame_at_stack_banging_point() is only called when we
- // have well defined stacks so java_sender() calls do not need
- // to assert safe_for_sender() first.
- *fr = fr->java_sender();
- }
- } else {
- // more complex code with compiled code
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling
- return false;
- } else {
- *fr = os::fetch_frame_from_context(uc);
- // in compiled code, the stack banging is performed just after the return pc
- // has been pushed on the stack
- *fr = frame(fr->sp() + 1, fr->fp(), (address)*(fr->sp()));
- if (!fr->is_java_frame()) {
- // See java_sender() comment above.
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ frame fr = os::fetch_frame_from_context(uc);
+ // in compiled code, the stack banging is performed just after the return pc
+ // has been pushed on the stack
+ return frame(fr.sp() + 1, fr.fp(), (address)*(fr.sp()));
}
// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
@@ -414,57 +385,14 @@ frame os::current_frame() {
}
}
-// Utility functions
-
// From IA32 System Programming Guide
enum {
trap_page_fault = 0xE
};
-extern "C" JNIEXPORT int
-JVM_handle_bsd_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
- Thread* t = Thread::current_or_null_safe();
-
- // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
- // (no destructors can be run)
- os::ThreadCrashProtection::check_crash_protection(sig, t);
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_bsd_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
/*
NOTE: does not seem to work on bsd.
if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
@@ -485,7 +413,7 @@ JVM_handle_bsd_signal(int sig,
if (StubRoutines::is_safefetch_fault(pc)) {
os::Bsd::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
- return 1;
+ return true;
}
// Handle ALL stack overflow variations here
@@ -495,40 +423,8 @@ JVM_handle_bsd_signal(int sig,
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
// stack overflow
- StackOverflow* overflow_state = thread->stack_overflow_state();
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Bsd::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Java frame");
- frame activation = SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)(
- activation.fp() + frame::interpreter_frame_initial_sp_offset));
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception. Guard pages will be reenabled
- // while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
}
}
}
@@ -611,7 +507,7 @@ JVM_handle_bsd_signal(int sig,
int op = pc[0];
if (op == 0xDB) {
// FIST
- // TODO: The encoding of D2I in i486.ad can cause an exception
+ // TODO: The encoding of D2I in x86_32.ad can cause an exception
// prior to the fist instruction if there was an invalid operation
// pending. We want to dismiss that exception. From the win_32
// side it also seems that if it really was the fist causing
@@ -740,29 +636,6 @@ JVM_handle_bsd_signal(int sig,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Bsd::ucontext_get_pc(uc);
- }
-
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
return false;
}
diff --git a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
index 1c6f2155ef8..b9ee395bc30 100644
--- a/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
+++ b/src/hotspot/os_cpu/bsd_zero/atomic_bsd_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2011, 2015, Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -184,7 +184,9 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
#ifdef M68K
return add_using_helper(m68k_add_and_fetch, dest, add_value);
#else
- return __sync_add_and_fetch(dest, add_value);
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
#endif // M68K
#endif // ARM
}
@@ -196,7 +198,9 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
STATIC_ASSERT(8 == sizeof(I));
STATIC_ASSERT(8 == sizeof(D));
- return __sync_add_and_fetch(dest, add_value);
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
}
template<>
@@ -255,7 +259,12 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
#ifdef M68K
return cmpxchg_using_helper(m68k_compare_and_swap, dest, compare_value, exchange_value);
#else
- return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+ T value = compare_value;
+ FULL_MEM_BARRIER;
+ __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ FULL_MEM_BARRIER;
+ return value;
#endif // M68K
#endif // ARM
}
@@ -267,7 +276,13 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
T exchange_value,
atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(T));
- return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+
+ T value = compare_value;
+ FULL_MEM_BARRIER;
+ __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ FULL_MEM_BARRIER;
+ return value;
}
template<>
diff --git a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
index 0e0dfeca71c..1ab2001f5ab 100644
--- a/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
+++ b/src/hotspot/os_cpu/bsd_zero/os_bsd_zero.cpp
@@ -115,18 +115,10 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame();
}
-extern "C" JNIEXPORT int
-JVM_handle_bsd_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
- Thread* t = Thread::current_or_null_safe();
-
- SignalHandlerMark shm(t);
-
- // handle SafeFetch faults
+ // handle SafeFetch faults the zero way
if (sig == SIGSEGV || sig == SIGBUS) {
sigjmp_buf* const pjb = get_jmp_buf_for_continuation();
if (pjb) {
@@ -134,37 +126,6 @@ JVM_handle_bsd_signal(int sig,
}
}
- // Note: it's not uncommon that JNI code uses signal/sigset to
- // install then restore certain signal handler (e.g. to temporarily
- // block SIGPIPE, or have a SIGILL handler when detecting CPU
- // type). When that happens, JVM_handle_bsd_signal() might be
- // invoked with junk info/ucVoid. To avoid unnecessary crash when
- // libjsig is not preloaded, try handle signals that do not require
- // siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
-
if (info != NULL && thread != NULL) {
// Handle ALL stack overflow variations here
if (sig == SIGSEGV || sig == SIGBUS) {
@@ -172,13 +133,14 @@ JVM_handle_bsd_signal(int sig,
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
+ StackOverflow* overflow_state = thread->stack_overflow_state();
// stack overflow
- if (thread->in_stack_yellow_reserved_zone(addr)) {
- thread->disable_stack_yellow_reserved_zone();
+ if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
+ overflow_state->disable_stack_yellow_reserved_zone();
ShouldNotCallThis();
}
- else if (thread->in_stack_red_zone(addr)) {
- thread->disable_stack_red_zone();
+ else if (overflow_state->in_stack_red_zone(addr)) {
+ overflow_state->disable_stack_red_zone();
ShouldNotCallThis();
}
}
@@ -203,36 +165,6 @@ JVM_handle_bsd_signal(int sig,
}*/
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
-#ifndef PRODUCT
- if (sig == SIGSEGV) {
- fatal("\n#"
- "\n# /--------------------\\"
- "\n# | segmentation fault |"
- "\n# \\---\\ /--------------/"
- "\n# /"
- "\n# [-] |\\_/| "
- "\n# (+)=C |o o|__ "
- "\n# | | =-*-=__\\ "
- "\n# OOO c_c_(___)");
- }
-#endif // !PRODUCT
-
- const char *fmt =
- "caught unhandled signal " INT32_FORMAT " at address " PTR_FORMAT;
- char buf[128];
-
- sprintf(buf, fmt, sig, info->si_addr);
- fatal(buf);
return false;
}
@@ -436,6 +368,7 @@ extern "C" {
long long unsigned int oldval,
long long unsigned int newval) {
ShouldNotCallThis();
+ return 0; // silence compiler warnings
}
};
#endif // !_LP64
diff --git a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
index d4bca5cd374..9ba2179a80c 100644
--- a/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
+++ b/src/hotspot/os_cpu/linux_aarch64/os_linux_aarch64.cpp
@@ -75,7 +75,6 @@
# include
# include
# include
-# include
#define REG_FP 29
#define REG_LR 30
@@ -134,44 +133,16 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(sp, fp, epc);
}
-bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Linux::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- *fr = fr->java_sender();
- }
- } else {
- // more complex code with compiled code
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling
- return false;
- } else {
- // In compiled code, the stack banging is performed before LR
- // has been saved in the frame. LR is live, and SP and FP
- // belong to the caller.
- intptr_t* fp = os::Linux::ucontext_get_fp(uc);
- intptr_t* sp = os::Linux::ucontext_get_sp(uc);
- address pc = (address)(uc->uc_mcontext.regs[REG_LR]
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ // In compiled code, the stack banging is performed before LR
+ // has been saved in the frame. LR is live, and SP and FP
+ // belong to the caller.
+ intptr_t* fp = os::Linux::ucontext_get_fp(uc);
+ intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+ address pc = (address)(uc->uc_mcontext.regs[REG_LR]
- NativeInstruction::instruction_size);
- *fr = frame(sp, fp, pc);
- if (!fr->is_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- assert(!fr->is_first_frame(), "Safety check");
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+ return frame(sp, fp, pc);
}
// By default, gcc always saves frame pointer rfp on this stack. This
@@ -193,58 +164,9 @@ NOINLINE frame os::current_frame() {
}
}
-extern "C" JNIEXPORT int
-JVM_handle_linux_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
- // (no destructors can be run)
- os::ThreadCrashProtection::check_crash_protection(sig, t);
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
-#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
- if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
- if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
- return 1;
- }
- }
-#endif
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
/*
NOTE: does not seem to work on linux.
if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
@@ -265,7 +187,7 @@ JVM_handle_linux_signal(int sig,
if (StubRoutines::is_safefetch_fault(pc)) {
os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
- return 1;
+ return true;
}
address addr = (address) info->si_addr;
@@ -279,62 +201,8 @@ JVM_handle_linux_signal(int sig,
if (sig == SIGSEGV) {
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
- StackOverflow* overflow_state = thread->stack_overflow_state();
- // stack overflow
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Java frame");
- frame activation =
- SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)(
- activation.fp() + frame::interpreter_frame_initial_sp_offset));
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception. Guard pages will be reenabled
- // while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-
- // This is a likely cause, but hard to verify. Let's just print
- // it as a hint.
- tty->print_raw_cr("Please check if any of your loaded .so files has "
- "enabled executable stack (see man page execstack(8))");
- } else {
- // Accessing stack address below sp may cause SEGV if current
- // thread has MAP_GROWSDOWN stack. This should only happen when
- // current thread was created by user code with MAP_GROWSDOWN flag
- // and then attached to VM. See notes in os_linux.cpp.
- if (thread->osthread()->expanding_stack() == 0) {
- thread->osthread()->set_expanding_stack();
- if (os::Linux::manually_expand_stack(thread, addr)) {
- thread->osthread()->clear_expanding_stack();
- return 1;
- }
- thread->osthread()->clear_expanding_stack();
- } else {
- fatal("recursive segv. expanding stack.");
- }
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
}
}
}
@@ -377,10 +245,7 @@ JVM_handle_linux_signal(int sig,
tty->print_cr("trap: %s: (SIGILL)", msg);
}
- va_list detail_args;
- VMError::report_and_die(INTERNAL_ERROR, msg, detail_msg, detail_args, thread,
- pc, info, ucVoid, NULL, 0, 0);
- va_end(detail_args);
+ return false; // Fatal error
}
else
@@ -426,30 +291,8 @@ JVM_handle_linux_signal(int sig,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Linux::ucontext_get_pc(uc);
- }
-
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
+ return false; // Mute compiler
- ShouldNotReachHere();
- return true; // Mute compiler
}
void os::Linux::init_thread_fpu_state(void) {
diff --git a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
index 2a6c3a74689..5a1f273c548 100644
--- a/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
+++ b/src/hotspot/os_cpu/linux_aarch64/thread_linux_aarch64.hpp
@@ -27,15 +27,6 @@
#define OS_CPU_LINUX_AARCH64_THREAD_LINUX_AARCH64_HPP
private:
-#ifdef ASSERT
- // spill stack holds N callee-save registers at each Java call and
- // grows downwards towards limit
- // we need limit to check we have space for a spill and base so we
- // can identify all live spill frames at GC (eventually)
- address _spill_stack;
- address _spill_stack_base;
- address _spill_stack_limit;
-#endif // ASSERT
void pd_initialize() {
_anchor.clear();
diff --git a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
index d5fb3f4680b..199a096d7f9 100644
--- a/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
+++ b/src/hotspot/os_cpu/linux_aarch64/vm_version_linux_aarch64.cpp
@@ -59,6 +59,10 @@
#define HWCAP_DCPOP (1<<16)
#endif
+#ifndef HWCAP_SHA3
+#define HWCAP_SHA3 (1 << 17)
+#endif
+
#ifndef HWCAP_SHA512
#define HWCAP_SHA512 (1 << 21)
#endif
@@ -82,7 +86,7 @@ int VM_Version::get_current_sve_vector_length() {
return prctl(PR_SVE_GET_VL);
}
-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {
+int VM_Version::set_and_get_current_sve_vector_length(int length) {
assert(_features & CPU_SVE, "should not call this");
int new_length = prctl(PR_SVE_SET_VL, length);
return new_length;
@@ -103,6 +107,7 @@ void VM_Version::get_os_cpu_info() {
static_assert(CPU_CRC32 == HWCAP_CRC32);
static_assert(CPU_LSE == HWCAP_ATOMICS);
static_assert(CPU_DCPOP == HWCAP_DCPOP);
+ static_assert(CPU_SHA3 == HWCAP_SHA3);
static_assert(CPU_SHA512 == HWCAP_SHA512);
static_assert(CPU_SVE == HWCAP_SVE);
_features = auxv & (
@@ -116,6 +121,7 @@ void VM_Version::get_os_cpu_info() {
HWCAP_CRC32 |
HWCAP_ATOMICS |
HWCAP_DCPOP |
+ HWCAP_SHA3 |
HWCAP_SHA512 |
HWCAP_SVE);
@@ -136,7 +142,6 @@ void VM_Version::get_os_cpu_info() {
_zva_length = 4 << (dczid_el0 & 0xf);
}
- int cpu_lines = 0;
if (FILE *f = fopen("/proc/cpuinfo", "r")) {
// need a large buffer as the flags line may include lots of text
char buf[1024], *p;
@@ -145,7 +150,6 @@ void VM_Version::get_os_cpu_info() {
long v = strtol(p+1, NULL, 0);
if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) {
_cpu = v;
- cpu_lines++;
} else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) {
_variant = v;
} else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) {
@@ -162,5 +166,4 @@ void VM_Version::get_os_cpu_info() {
}
fclose(f);
}
- guarantee(cpu_lines == os::processor_count(), "core count should be consistent");
}
diff --git a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
index ab8481c749b..41733be6ca3 100644
--- a/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
+++ b/src/hotspot/os_cpu/linux_arm/os_linux_arm.cpp
@@ -241,19 +241,9 @@ address check_vfp3_32_fault_instr = NULL;
address check_simd_fault_instr = NULL;
address check_mp_ext_fault_instr = NULL;
-// Utility functions
-extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
- void* ucVoid, int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
- // (no destructors can be run)
- os::ThreadCrashProtection::check_crash_protection(sig, t);
-
- SignalHandlerMark shm(t);
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
if (sig == SIGILL &&
((info->si_addr == (caddr_t)check_simd_fault_instr)
@@ -267,44 +257,6 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
return true;
}
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
-#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
- if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
- if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
- return 1;
- }
- }
-#endif
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
-
address stub = NULL;
address pc = NULL;
bool unsafe_access = false;
@@ -318,7 +270,7 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
if (StubRoutines::is_safefetch_fault(pc)) {
os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
- return 1;
+ return true;
}
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
@@ -332,7 +284,7 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
} else {
// Thread was in the vm or native code. Return and try to finish.
- return 1;
+ return true;
}
} else if (overflow_state->in_stack_red_zone(addr)) {
// Fatal red zone violation. Disable the guard pages and fall through
@@ -348,7 +300,7 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
thread->osthread()->set_expanding_stack();
if (os::Linux::manually_expand_stack(thread, addr)) {
thread->osthread()->clear_expanding_stack();
- return 1;
+ return true;
}
thread->osthread()->clear_expanding_stack();
} else {
@@ -441,30 +393,8 @@ extern "C" int JVM_handle_linux_signal(int sig, siginfo_t* info,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Linux::ucontext_get_pc(uc);
- }
-
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
return false;
+
}
void os::Linux::init_thread_fpu_state(void) {
diff --git a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp
index c820886ba80..ac4d1b72eb3 100644
--- a/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp
+++ b/src/hotspot/os_cpu/linux_ppc/os_linux_ppc.cpp
@@ -162,40 +162,11 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(sp, epc);
}
-bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Linux::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // Interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- *fr = fr->java_sender();
- }
- } else {
- // More complex code with compiled code.
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling. In compiled code, we bang before
- // the frame is complete.
- return false;
- } else {
- intptr_t* sp = os::Linux::ucontext_get_sp(uc);
- address lr = ucontext_get_lr(uc);
- *fr = frame(sp, lr);
- if (!fr->is_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- assert(!fr->is_first_frame(), "Safety check");
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+ address lr = ucontext_get_lr(uc);
+ return frame(sp, lr);
}
frame os::get_sender_for_C_frame(frame* fr) {
@@ -217,34 +188,8 @@ frame os::current_frame() {
return os::get_sender_for_C_frame(&tmp);
}
-// Utility functions
-
-extern "C" JNIEXPORT int
-JVM_handle_linux_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE) {
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE - see bugs 4229104
- return true;
- }
- }
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
// Make the signal handler transaction-aware by checking the existence of a
// second (transactional) context with MSR TS bits active. If the signal is
@@ -268,26 +213,6 @@ JVM_handle_linux_signal(int sig,
}
}
-#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
- if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
- if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
- return 1;
- }
- }
-#endif
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL) {
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- } else if(t->is_VM_thread()) {
- vmthread = (VMThread *)t;
- }
- }
- }
-
// Moved SafeFetch32 handling outside thread!=NULL conditional block to make
// it work if no associated JavaThread object exists.
if (uc) {
@@ -327,60 +252,8 @@ JVM_handle_linux_signal(int sig,
// Check if fault address is within thread stack.
if (thread->is_in_full_stack(addr)) {
// stack overflow
- StackOverflow* overflow_state = thread->stack_overflow_state();
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Javac frame");
- frame activation =
- SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)activation.fp());
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception.
- // Guard pages will be reenabled while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-
- // This is a likely cause, but hard to verify. Let's just print
- // it as a hint.
- tty->print_raw_cr("Please check if any of your loaded .so files has "
- "enabled executable stack (see man page execstack(8))");
- } else {
- // Accessing stack address below sp may cause SEGV if current
- // thread has MAP_GROWSDOWN stack. This should only happen when
- // current thread was created by user code with MAP_GROWSDOWN flag
- // and then attached to VM. See notes in os_linux.cpp.
- if (thread->osthread()->expanding_stack() == 0) {
- thread->osthread()->set_expanding_stack();
- if (os::Linux::manually_expand_stack(thread, addr)) {
- thread->osthread()->clear_expanding_stack();
- return 1;
- }
- thread->osthread()->clear_expanding_stack();
- } else {
- fatal("recursive segv. expanding stack.");
- }
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
}
}
}
@@ -389,17 +262,6 @@ JVM_handle_linux_signal(int sig,
// Java thread running in Java code => find exception handler if any
// a fault inside compiled code, the interpreter, or a stub
- // A VM-related SIGILL may only occur if we are not in the zero page.
- // On AIX, we get a SIGILL if we jump to 0x0 or to somewhere else
- // in the zero page, because it is filled with 0x0. We ignore
- // explicit SIGILLs in the zero page.
- if (sig == SIGILL && (pc < (address) 0x200)) {
- if (TraceTraps) {
- tty->print_raw_cr("SIGILL happened inside zero page.");
- }
- goto report_and_die;
- }
-
CodeBlob *cb = NULL;
int stop_type = -1;
// Handle signal from NativeJump::patch_verified_entry().
@@ -487,10 +349,7 @@ JVM_handle_linux_signal(int sig,
tty->print_cr("trap: %s: %s (SIGTRAP, stop type %d)", msg, detail_msg, stop_type);
}
- va_list detail_args;
- VMError::report_and_die(INTERNAL_ERROR, msg, detail_msg, detail_args, thread,
- pc, info, ucVoid, NULL, 0, 0);
- va_end(detail_args);
+ return false; // Fatal error
}
else if (sig == SIGBUS) {
@@ -548,31 +407,8 @@ JVM_handle_linux_signal(int sig,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Linux::ucontext_get_pc(uc);
- }
-
-report_and_die:
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
return false;
+
}
void os::Linux::init_thread_fpu_state(void) {
diff --git a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
index 97473c2b99f..d3d73053089 100644
--- a/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
+++ b/src/hotspot/os_cpu/linux_s390/os_linux_s390.cpp
@@ -145,40 +145,11 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(sp, epc);
}
-bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Linux::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // Interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- *fr = fr->java_sender();
- }
- } else {
- // More complex code with compiled code.
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling. In compiled code, we bang before
- // the frame is complete.
- return false;
- } else {
- intptr_t* sp = os::Linux::ucontext_get_sp(uc);
- address lr = ucontext_get_lr(uc);
- *fr = frame(sp, lr);
- if (!fr->is_java_frame()) {
- assert(fr->safe_for_sender(thread), "Safety check");
- assert(!fr->is_first_frame(), "Safety check");
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+ address lr = ucontext_get_lr(uc);
+ return frame(sp, lr);
}
frame os::get_sender_for_C_frame(frame* fr) {
@@ -233,60 +204,8 @@ frame os::current_frame() {
}
}
-// Utility functions
-
-extern "C" JNIEXPORT int
-JVM_handle_linux_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
- // (no destructors can be run).
- os::ThreadCrashProtection::check_crash_protection(sig, t);
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE) {
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- if (PrintMiscellaneous && (WizardMode || Verbose)) {
- warning("Ignoring SIGPIPE - see bug 4229104");
- }
- return true;
- }
- }
-
-#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
- if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
- if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
- return 1;
- }
- }
-#endif
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL) {
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- } else if(t->is_VM_thread()) {
- vmthread = (VMThread *)t;
- }
- }
- }
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
// Moved SafeFetch32 handling outside thread!=NULL conditional block to make
// it work if no associated JavaThread object exists.
@@ -323,60 +242,8 @@ JVM_handle_linux_signal(int sig,
// Check if fault address is within thread stack.
if (thread->is_in_full_stack(addr)) {
// stack overflow
- StackOverflow* overflow_state = thread->stack_overflow_state();
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Javac frame");
- frame activation =
- SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)activation.fp());
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception.
- // Guard pages will be reenabled while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-
- // This is a likely cause, but hard to verify. Let's just print
- // it as a hint.
- tty->print_raw_cr("Please check if any of your loaded .so files has "
- "enabled executable stack (see man page execstack(8))");
- } else {
- // Accessing stack address below sp may cause SEGV if current
- // thread has MAP_GROWSDOWN stack. This should only happen when
- // current thread was created by user code with MAP_GROWSDOWN flag
- // and then attached to VM. See notes in os_linux.cpp.
- if (thread->osthread()->expanding_stack() == 0) {
- thread->osthread()->set_expanding_stack();
- if (os::Linux::manually_expand_stack(thread, addr)) {
- thread->osthread()->clear_expanding_stack();
- return 1;
- }
- thread->osthread()->clear_expanding_stack();
- } else {
- fatal("recursive segv. expanding stack.");
- }
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
}
}
}
@@ -500,38 +367,8 @@ JVM_handle_linux_signal(int sig,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Linux::ucontext_get_pc(uc);
- }
-
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- // Hand down correct pc for SIGILL, SIGFPE. pc from context
- // usually points to the instruction after the failing instruction.
- // Note: this should be combined with the trap_pc handling above,
- // because it handles the same issue.
- if (sig == SIGILL || sig == SIGFPE) {
- pc = (address)info->si_addr;
- }
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
return false;
+
}
void os::Linux::init_thread_fpu_state(void) {
diff --git a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
index aacbcce08c3..6c977fc96f1 100644
--- a/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
+++ b/src/hotspot/os_cpu/linux_x86/os_linux_x86.cpp
@@ -148,43 +148,11 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(sp, fp, epc);
}
-bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
- address pc = (address) os::Linux::ucontext_get_pc(uc);
- if (Interpreter::contains(pc)) {
- // interpreter performs stack banging after the fixed frame header has
- // been generated while the compilers perform it before. To maintain
- // semantic consistency between interpreted and compiled frames, the
- // method returns the Java sender of the current frame.
- *fr = os::fetch_frame_from_context(uc);
- if (!fr->is_first_java_frame()) {
- // get_frame_at_stack_banging_point() is only called when we
- // have well defined stacks so java_sender() calls do not need
- // to assert safe_for_sender() first.
- *fr = fr->java_sender();
- }
- } else {
- // more complex code with compiled code
- assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
- CodeBlob* cb = CodeCache::find_blob(pc);
- if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
- // Not sure where the pc points to, fallback to default
- // stack overflow handling
- return false;
- } else {
- // in compiled code, the stack banging is performed just after the return pc
- // has been pushed on the stack
- intptr_t* fp = os::Linux::ucontext_get_fp(uc);
- intptr_t* sp = os::Linux::ucontext_get_sp(uc);
- *fr = frame(sp + 1, fp, (address)*sp);
- if (!fr->is_java_frame()) {
- assert(!fr->is_first_frame(), "Safety check");
- // See java_sender() comment above.
- *fr = fr->java_sender();
- }
- }
- }
- assert(fr->is_java_frame(), "Safety check");
- return true;
+frame os::fetch_compiled_frame_from_context(const void* ucVoid) {
+ const ucontext_t* uc = (const ucontext_t*)ucVoid;
+ intptr_t* fp = os::Linux::ucontext_get_fp(uc);
+ intptr_t* sp = os::Linux::ucontext_get_sp(uc);
+ return frame(sp + 1, fp, (address)*sp);
}
// By default, gcc always save frame pointer (%ebp/%rbp) on stack. It may get
@@ -232,59 +200,10 @@ enum {
trap_page_fault = 0xE
};
-extern "C" JNIEXPORT int
-JVM_handle_linux_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
- // (no destructors can be run)
- os::ThreadCrashProtection::check_crash_protection(sig, t);
-
- SignalHandlerMark shm(t);
-
- // Note: it's not uncommon that JNI code uses signal/sigset to install
- // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
- // or have a SIGILL handler when detecting CPU type). When that happens,
- // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
- // avoid unnecessary crash when libjsig is not preloaded, try handle signals
- // that do not require siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
-#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
- if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
- if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
- return 1;
- }
- }
-#endif
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
-/*
+ /*
NOTE: does not seem to work on linux.
if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
// can't decode this kind of signal
@@ -304,7 +223,7 @@ JVM_handle_linux_signal(int sig,
if (StubRoutines::is_safefetch_fault(pc)) {
os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
- return 1;
+ return true;
}
#ifndef AMD64
@@ -324,61 +243,8 @@ JVM_handle_linux_signal(int sig,
// check if fault address is within thread stack
if (thread->is_in_full_stack(addr)) {
// stack overflow
- StackOverflow* overflow_state = thread->stack_overflow_state();
- if (overflow_state->in_stack_yellow_reserved_zone(addr)) {
- if (thread->thread_state() == _thread_in_Java) {
- if (overflow_state->in_stack_reserved_zone(addr)) {
- frame fr;
- if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
- assert(fr.is_java_frame(), "Must be a Java frame");
- frame activation =
- SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
- if (activation.sp() != NULL) {
- overflow_state->disable_stack_reserved_zone();
- if (activation.is_interpreted_frame()) {
- overflow_state->set_reserved_stack_activation((address)(
- activation.fp() + frame::interpreter_frame_initial_sp_offset));
- } else {
- overflow_state->set_reserved_stack_activation((address)activation.unextended_sp());
- }
- return 1;
- }
- }
- }
- // Throw a stack overflow exception. Guard pages will be reenabled
- // while unwinding the stack.
- overflow_state->disable_stack_yellow_reserved_zone();
- stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
- } else {
- // Thread was in the vm or native code. Return and try to finish.
- overflow_state->disable_stack_yellow_reserved_zone();
- return 1;
- }
- } else if (overflow_state->in_stack_red_zone(addr)) {
- // Fatal red zone violation. Disable the guard pages and fall through
- // to handle_unexpected_exception way down below.
- overflow_state->disable_stack_red_zone();
- tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
-
- // This is a likely cause, but hard to verify. Let's just print
- // it as a hint.
- tty->print_raw_cr("Please check if any of your loaded .so files has "
- "enabled executable stack (see man page execstack(8))");
- } else {
- // Accessing stack address below sp may cause SEGV if current
- // thread has MAP_GROWSDOWN stack. This should only happen when
- // current thread was created by user code with MAP_GROWSDOWN flag
- // and then attached to VM. See notes in os_linux.cpp.
- if (thread->osthread()->expanding_stack() == 0) {
- thread->osthread()->set_expanding_stack();
- if (os::Linux::manually_expand_stack(thread, addr)) {
- thread->osthread()->clear_expanding_stack();
- return 1;
- }
- thread->osthread()->clear_expanding_stack();
- } else {
- fatal("recursive segv. expanding stack.");
- }
+ if (os::Posix::handle_stack_overflow(thread, addr, pc, uc, &stub)) {
+ return true; // continue
}
}
}
@@ -426,7 +292,7 @@ JVM_handle_linux_signal(int sig,
int op = pc[0];
if (op == 0xDB) {
// FIST
- // TODO: The encoding of D2I in i486.ad can cause an exception
+ // TODO: The encoding of D2I in x86_32.ad can cause an exception
// prior to the fist instruction if there was an invalid operation
// pending. We want to dismiss that exception. From the win_32
// side it also seems that if it really was the fist causing
@@ -555,30 +421,7 @@ JVM_handle_linux_signal(int sig,
return true;
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
- if (pc == NULL && uc != NULL) {
- pc = os::Linux::ucontext_get_pc(uc);
- }
-
- // unmask current signal
- sigset_t newset;
- sigemptyset(&newset);
- sigaddset(&newset, sig);
- sigprocmask(SIG_UNBLOCK, &newset, NULL);
-
- VMError::report_and_die(t, sig, pc, info, ucVoid);
-
- ShouldNotReachHere();
- return true; // Mute compiler
+ return false;
}
void os::Linux::init_thread_fpu_state(void) {
@@ -838,8 +681,8 @@ void os::workaround_expand_exec_shield_cs_limit() {
if (os::is_primordial_thread()) {
address limit = Linux::initial_thread_stack_bottom();
if (! DisablePrimordialThreadGuardPages) {
- limit += JavaThread::stack_red_zone_size() +
- JavaThread::stack_yellow_zone_size();
+ limit += StackOverflow::stack_red_zone_size() +
+ StackOverflow::stack_yellow_zone_size();
}
os::Linux::expand_stack_to(limit);
}
@@ -860,7 +703,7 @@ void os::workaround_expand_exec_shield_cs_limit() {
* we don't have much control or understanding of the address space, just let it slide.
*/
char* hint = (char*)(Linux::initial_thread_stack_bottom() -
- (JavaThread::stack_guard_zone_size() + page_size));
+ (StackOverflow::stack_guard_zone_size() + page_size));
char* codebuf = os::attempt_reserve_memory_at(hint, page_size);
if (codebuf == NULL) {
diff --git a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
index fb6e350cf05..e373b90bd9b 100644
--- a/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
+++ b/src/hotspot/os_cpu/linux_zero/atomic_linux_zero.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright 2007, 2008, 2011, 2015, Red Hat, Inc.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@@ -49,7 +49,9 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(D volatile* dest, I add_value,
STATIC_ASSERT(4 == sizeof(I));
STATIC_ASSERT(4 == sizeof(D));
- return __sync_add_and_fetch(dest, add_value);
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
}
template<>
@@ -58,7 +60,10 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(D volatile* dest, I add_value,
atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(I));
STATIC_ASSERT(8 == sizeof(D));
- return __sync_add_and_fetch(dest, add_value);
+
+ D res = __atomic_add_fetch(dest, add_value, __ATOMIC_RELEASE);
+ FULL_MEM_BARRIER;
+ return res;
}
template<>
@@ -103,7 +108,13 @@ inline T Atomic::PlatformCmpxchg<4>::operator()(T volatile* dest,
T exchange_value,
atomic_memory_order order) const {
STATIC_ASSERT(4 == sizeof(T));
- return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+
+ T value = compare_value;
+ FULL_MEM_BARRIER;
+ __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ FULL_MEM_BARRIER;
+ return value;
}
template<>
@@ -113,7 +124,13 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T volatile* dest,
T exchange_value,
atomic_memory_order order) const {
STATIC_ASSERT(8 == sizeof(T));
- return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+
+ FULL_MEM_BARRIER;
+ T value = compare_value;
+ __atomic_compare_exchange(dest, &value, &exchange_value, /*weak*/false,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED);
+ FULL_MEM_BARRIER;
+ return value;
}
template<>
diff --git a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
index de09ba311b5..8ff5ac61c5c 100644
--- a/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
+++ b/src/hotspot/os_cpu/linux_zero/os_linux_zero.cpp
@@ -111,16 +111,8 @@ frame os::fetch_frame_from_context(const void* ucVoid) {
return frame(NULL, NULL); // silence compile warnings
}
-extern "C" JNIEXPORT int
-JVM_handle_linux_signal(int sig,
- siginfo_t* info,
- void* ucVoid,
- int abort_if_unrecognized) {
- ucontext_t* uc = (ucontext_t*) ucVoid;
-
- Thread* t = Thread::current_or_null_safe();
-
- SignalHandlerMark shm(t);
+bool PosixSignals::pd_hotspot_signal_handler(int sig, siginfo_t* info,
+ ucontext_t* uc, JavaThread* thread) {
// handle SafeFetch faults
if (sig == SIGSEGV || sig == SIGBUS) {
@@ -130,37 +122,6 @@ JVM_handle_linux_signal(int sig,
}
}
- // Note: it's not uncommon that JNI code uses signal/sigset to
- // install then restore certain signal handler (e.g. to temporarily
- // block SIGPIPE, or have a SIGILL handler when detecting CPU
- // type). When that happens, JVM_handle_linux_signal() might be
- // invoked with junk info/ucVoid. To avoid unnecessary crash when
- // libjsig is not preloaded, try handle signals that do not require
- // siginfo/ucontext first.
-
- if (sig == SIGPIPE || sig == SIGXFSZ) {
- // allow chained handler to go first
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- } else {
- // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
- return true;
- }
- }
-
- JavaThread* thread = NULL;
- VMThread* vmthread = NULL;
- if (PosixSignals::are_signal_handlers_installed()) {
- if (t != NULL ){
- if(t->is_Java_thread()) {
- thread = t->as_Java_thread();
- }
- else if(t->is_VM_thread()){
- vmthread = (VMThread *)t;
- }
- }
- }
-
if (info != NULL && thread != NULL) {
// Handle ALL stack overflow variations here
if (sig == SIGSEGV) {
@@ -218,40 +179,8 @@ JVM_handle_linux_signal(int sig,
}*/
}
- // signal-chaining
- if (PosixSignals::chained_handler(sig, info, ucVoid)) {
- return true;
- }
-
- if (!abort_if_unrecognized) {
- // caller wants another chance, so give it to him
- return false;
- }
-
-#ifndef PRODUCT
- if (sig == SIGSEGV) {
- fatal("\n#"
- "\n# /--------------------\\"
- "\n# | segmentation fault |"
- "\n# \\---\\ /--------------/"
- "\n# /"
- "\n# [-] |\\_/| "
- "\n# (+)=C |o o|__ "
- "\n# | | =-*-=__\\ "
- "\n# OOO c_c_(___)");
- }
-#endif // !PRODUCT
-
- char buf[64];
-
- sprintf(buf, "caught unhandled signal %d", sig);
+ return false; // Fatal error
-// Silence -Wformat-security warning for fatal()
-PRAGMA_DIAG_PUSH
-PRAGMA_FORMAT_NONLITERAL_IGNORED
- fatal(buf);
-PRAGMA_DIAG_POP
- return true; // silence compiler warnings
}
void os::Linux::init_thread_fpu_state(void) {
@@ -481,7 +410,7 @@ extern "C" {
long long unsigned int oldval,
long long unsigned int newval) {
ShouldNotCallThis();
- return 0; // silence compiler compiler warnings
+ return 0; // silence compiler warnings
}
};
#endif // !_LP64
diff --git a/src/hotspot/os_cpu/windows_aarch64/thread_windows_aarch64.hpp b/src/hotspot/os_cpu/windows_aarch64/thread_windows_aarch64.hpp
index 2b004fd75af..bcf43c8b088 100644
--- a/src/hotspot/os_cpu/windows_aarch64/thread_windows_aarch64.hpp
+++ b/src/hotspot/os_cpu/windows_aarch64/thread_windows_aarch64.hpp
@@ -27,16 +27,6 @@
private:
-#ifdef ASSERT
- // spill stack holds N callee-save registers at each Java call and
- // grows downwards towards limit
- // we need limit to check we have space for a spill and base so we
- // can identify all live spill frames at GC (eventually)
- address _spill_stack;
- address _spill_stack_base;
- address _spill_stack_limit;
-#endif // ASSERT
-
void pd_initialize() {
_anchor.clear();
}
diff --git a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp
index 80270aa62e1..825faddde01 100644
--- a/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp
+++ b/src/hotspot/os_cpu/windows_aarch64/vm_version_windows_aarch64.cpp
@@ -32,7 +32,7 @@ int VM_Version::get_current_sve_vector_length() {
return 0;
}
-int VM_Version::set_and_get_current_sve_vector_lenght(int length) {
+int VM_Version::set_and_get_current_sve_vector_length(int length) {
assert(_features & CPU_SVE, "should not call this");
ShouldNotReachHere();
return 0;
diff --git a/src/hotspot/share/adlc/adlparse.cpp b/src/hotspot/share/adlc/adlparse.cpp
index 198ee4eb586..7d62f861686 100644
--- a/src/hotspot/share/adlc/adlparse.cpp
+++ b/src/hotspot/share/adlc/adlparse.cpp
@@ -1006,7 +1006,8 @@ void ADLParser::frame_parse(void) {
skipws();
}
if (strcmp(token,"interpreter_method_reg")==0) {
- interpreter_method_parse(frame, false);
+ parse_err(WARN, "Using obsolete Token, interpreter_method_reg");
+ skipws();
}
if (strcmp(token,"cisc_spilling_operand_name")==0) {
cisc_spilling_operand_name_parse(frame, false);
@@ -1134,11 +1135,6 @@ void ADLParser::inline_cache_parse(FrameForm *frame, bool native) {
frame->_inline_cache_reg = parse_one_arg("inline cache reg entry");
}
-//------------------------------interpreter_method_parse------------------
-void ADLParser::interpreter_method_parse(FrameForm *frame, bool native) {
- frame->_interpreter_method_reg = parse_one_arg("method reg entry");
-}
-
//------------------------------cisc_spilling_operand_parse---------------------
void ADLParser::cisc_spilling_operand_name_parse(FrameForm *frame, bool native) {
frame->_cisc_spilling_operand_name = parse_one_arg("cisc spilling operand name");
diff --git a/src/hotspot/share/adlc/forms.cpp b/src/hotspot/share/adlc/forms.cpp
index ef72faa4a57..204f693cca3 100644
--- a/src/hotspot/share/adlc/forms.cpp
+++ b/src/hotspot/share/adlc/forms.cpp
@@ -268,6 +268,7 @@ Form::DataType Form::is_load_from_memory(const char *opType) const {
if( strcmp(opType,"LoadRange")==0 ) return Form::idealI;
if( strcmp(opType,"LoadS")==0 ) return Form::idealS;
if( strcmp(opType,"LoadVector")==0 ) return Form::idealV;
+ if( strcmp(opType,"LoadVectorGather")==0 ) return Form::idealV;
assert( strcmp(opType,"Load") != 0, "Must type Loads" );
return Form::none;
}
@@ -284,6 +285,7 @@ Form::DataType Form::is_store_to_memory(const char *opType) const {
if( strcmp(opType,"StoreN")==0) return Form::idealN;
if( strcmp(opType,"StoreNKlass")==0) return Form::idealNKlass;
if( strcmp(opType,"StoreVector")==0 ) return Form::idealV;
+ if( strcmp(opType,"StoreVectorScatter")==0 ) return Form::idealV;
assert( strcmp(opType,"Store") != 0, "Must type Stores" );
return Form::none;
}
diff --git a/src/hotspot/share/adlc/formsopt.hpp b/src/hotspot/share/adlc/formsopt.hpp
index 0ee97160e3a..400c2690a9a 100644
--- a/src/hotspot/share/adlc/formsopt.hpp
+++ b/src/hotspot/share/adlc/formsopt.hpp
@@ -336,7 +336,6 @@ class FrameForm : public Form {
// Public Data
char *_sync_stack_slots;
char *_inline_cache_reg;
- char *_interpreter_method_reg;
char *_interpreter_frame_pointer_reg;
char *_cisc_spilling_operand_name;
char *_frame_pointer;
diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp
index 4ac726bdf6d..42d3cc5d10d 100644
--- a/src/hotspot/share/adlc/formssel.cpp
+++ b/src/hotspot/share/adlc/formssel.cpp
@@ -3484,7 +3484,7 @@ int MatchNode::needs_ideal_memory_edge(FormDict &globals) const {
"StoreB","StoreC","Store" ,"StoreFP",
"LoadI", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF" ,
"LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
- "StoreVector", "LoadVector",
+ "StoreVector", "LoadVector", "LoadVectorGather", "StoreVectorScatter",
"LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
"LoadPLocked",
"StorePConditional", "StoreIConditional", "StoreLConditional",
@@ -3801,6 +3801,7 @@ void MatchNode::count_commutative_op(int& count) {
"MaxV", "MinV",
"MulI","MulL","MulF","MulD",
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
+ "MinV","MaxV",
"OrI","OrL",
"OrV",
"XorI","XorL",
@@ -4151,8 +4152,9 @@ bool MatchRule::is_vector() const {
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
"CMoveVD", "CMoveVF",
"DivVF","DivVD",
+ "MinV","MaxV",
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
- "NegVF","NegVD",
+ "NegVF","NegVD","NegVI",
"SqrtVD","SqrtVF",
"AndV" ,"XorV" ,"OrV",
"MaxV", "MinV",
@@ -4169,6 +4171,12 @@ bool MatchRule::is_vector() const {
"URShiftVB","URShiftVS","URShiftVI","URShiftVL",
"ReplicateB","ReplicateS","ReplicateI","ReplicateL","ReplicateF","ReplicateD",
"RoundDoubleModeV","RotateLeftV" , "RotateRightV", "LoadVector","StoreVector",
+ "LoadVectorGather", "StoreVectorScatter",
+ "VectorTest", "VectorLoadMask", "VectorStoreMask", "VectorBlend", "VectorInsert",
+ "VectorRearrange","VectorLoadShuffle", "VectorLoadConst",
+ "VectorCastB2X", "VectorCastS2X", "VectorCastI2X",
+ "VectorCastL2X", "VectorCastF2X", "VectorCastD2X",
+ "VectorMaskWrapper", "VectorMaskCmp", "VectorReinterpret",
"FmaVD", "FmaVF","PopCountVI",
// Next are not supported currently.
"PackB","PackS","PackI","PackL","PackF","PackD","Pack2L","Pack2D",
diff --git a/src/hotspot/share/adlc/output_c.cpp b/src/hotspot/share/adlc/output_c.cpp
index 674748dd1aa..c6083533b58 100644
--- a/src/hotspot/share/adlc/output_c.cpp
+++ b/src/hotspot/share/adlc/output_c.cpp
@@ -1150,10 +1150,9 @@ static void check_peepconstraints(FILE *fp, FormDict &globals, PeepMatch *pmatch
//
// Check for equivalence
//
- // fprintf(fp, "phase->eqv( ");
- // fprintf(fp, "inst%d->in(%d+%d) /* %s */, inst%d->in(%d+%d) /* %s */",
- // left_index, left_op_base, left_op_index, left_op,
- // right_index, right_op_base, right_op_index, right_op );
+ // fprintf(fp, "(inst%d->_opnds[%d]->reg(ra_,inst%d%s) /* %d.%s */ == /* %d.%s */ inst%d->_opnds[%d]->reg(ra_,inst%d%s)",
+ // left_index, left_op_index, left_index, left_reg_index, left_index, left_op
+ // right_index, right_op, right_index, right_op_index, right_index, right_reg_index);
// fprintf(fp, ")");
//
switch( left_interface_type ) {
@@ -3001,7 +3000,7 @@ void ArchDesc::define_oper_interface(FILE *fp, OperandForm &oper, FormDict &glob
// Provide a non-NULL return for disp_as_type() that will allow adr_type()
// to correctly compute the access type for alias analysis.
//
- // See BugId 4796752, operand indOffset32X in i486.ad
+ // See BugId 4796752, operand indOffset32X in x86_32.ad
int idx = rep_var_to_constant_index(disp, oper, globals);
fprintf(fp," virtual const TypePtr *disp_as_type() const { return _c%d; }\n", idx);
}
@@ -4192,14 +4191,7 @@ void ArchDesc::buildFrameMethods(FILE *fp_cpp) {
fprintf(fp_cpp,"int Matcher::inline_cache_reg_encode() {");
fprintf(fp_cpp," return _regEncode[inline_cache_reg()]; }\n\n");
- // Interpreter's Method Register, mask definition, and encoding
- fprintf(fp_cpp,"OptoReg::Name Matcher::interpreter_method_reg() {");
- fprintf(fp_cpp," return OptoReg::Name(%s_num); }\n\n",
- _frame->_interpreter_method_reg);
- fprintf(fp_cpp,"int Matcher::interpreter_method_reg_encode() {");
- fprintf(fp_cpp," return _regEncode[interpreter_method_reg()]; }\n\n");
-
- // Interpreter's Frame Pointer Register, mask definition, and encoding
+ // Interpreter's Frame Pointer Register
fprintf(fp_cpp,"OptoReg::Name Matcher::interpreter_frame_pointer_reg() {");
if (_frame->_interpreter_frame_pointer_reg == NULL)
fprintf(fp_cpp," return OptoReg::Bad; }\n\n");
diff --git a/src/hotspot/share/asm/assembler.cpp b/src/hotspot/share/asm/assembler.cpp
index 3a0b3ce0b3b..66e3052bdeb 100644
--- a/src/hotspot/share/asm/assembler.cpp
+++ b/src/hotspot/share/asm/assembler.cpp
@@ -199,19 +199,6 @@ void Label::patch_instructions(MacroAssembler* masm) {
continue;
}
-#ifdef ASSERT
- // Cross-section branches only work if the
- // intermediate section boundaries are frozen.
- if (target_sect != branch_sect) {
- for (int n = MIN2(target_sect, branch_sect),
- nlimit = (target_sect + branch_sect) - n;
- n < nlimit; n++) {
- CodeSection* cs = cb->code_section(n);
- assert(cs->is_frozen(), "cross-section branch needs stable offsets");
- }
- }
-#endif //ASSERT
-
// Push the target offset into the branch instruction.
masm->pd_patch_instruction(branch, target, file, line);
}
diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp
index a4c5d7d4554..8f76130522e 100644
--- a/src/hotspot/share/asm/assembler.hpp
+++ b/src/hotspot/share/asm/assembler.hpp
@@ -431,22 +431,6 @@ class AbstractAssembler : public ResourceObj {
return ptr;
}
- // Bootstrapping aid to cope with delayed determination of constants.
- // Returns a static address which will eventually contain the constant.
- // The value zero (NULL) stands instead of a constant which is still uncomputed.
- // Thus, the eventual value of the constant must not be zero.
- // This is fine, since this is designed for embedding object field
- // offsets in code which must be generated before the object class is loaded.
- // Field offsets are never zero, since an object's header (mark word)
- // is located at offset zero.
- RegisterOrConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0);
- RegisterOrConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0);
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset) = 0;
- // Last overloading is platform-dependent; look in assembler_.cpp.
- static intptr_t* delayed_value_addr(int(*constant_fn)());
- static intptr_t* delayed_value_addr(address(*constant_fn)());
- static void update_delayed_values();
-
// Bang stack to trigger StackOverflowError at a safe location
// implementation delegates to machine-specific bang_stack_with_offset
void generate_stack_overflow_check( int frame_size_in_bytes );
diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
index 558dee23ecc..db2178f9977 100644
--- a/src/hotspot/share/asm/codeBuffer.cpp
+++ b/src/hotspot/share/asm/codeBuffer.cpp
@@ -140,7 +140,7 @@ CodeBuffer::~CodeBuffer() {
// Claim is that stack allocation ensures resources are cleaned up.
// This is resource clean up, let's hope that all were properly copied out.
- free_strings();
+ NOT_PRODUCT(free_strings();)
#ifdef ASSERT
// Save allocation type to execute assert in ~ResourceObj()
@@ -176,28 +176,6 @@ void CodeBuffer::initialize_section_size(CodeSection* cs, csize_t size) {
if (_insts.has_locs()) cs->initialize_locs(1);
}
-void CodeBuffer::freeze_section(CodeSection* cs) {
- CodeSection* next_cs = (cs == consts())? NULL: code_section(cs->index()+1);
- csize_t frozen_size = cs->size();
- if (next_cs != NULL) {
- frozen_size = next_cs->align_at_start(frozen_size);
- }
- address old_limit = cs->limit();
- address new_limit = cs->start() + frozen_size;
- relocInfo* old_locs_limit = cs->locs_limit();
- relocInfo* new_locs_limit = cs->locs_end();
- // Patch the limits.
- cs->_limit = new_limit;
- cs->_locs_limit = new_locs_limit;
- cs->_frozen = true;
- if (next_cs != NULL && !next_cs->is_allocated() && !next_cs->is_frozen()) {
- // Give remaining buffer space to the following section.
- next_cs->initialize(new_limit, old_limit - new_limit);
- next_cs->initialize_shared_locs(new_locs_limit,
- old_locs_limit - new_locs_limit);
- }
-}
-
void CodeBuffer::set_blob(BufferBlob* blob) {
_blob = blob;
if (blob != NULL) {
@@ -257,23 +235,19 @@ int CodeBuffer::locator(address addr) const {
return -1;
}
-address CodeBuffer::locator_address(int locator) const {
- if (locator < 0) return NULL;
- address start = code_section(locator_sect(locator))->start();
- return start + locator_pos(locator);
-}
bool CodeBuffer::is_backward_branch(Label& L) {
return L.is_bound() && insts_end() <= locator_address(L.loc());
}
+#ifndef PRODUCT
address CodeBuffer::decode_begin() {
address begin = _insts.start();
if (_decode_begin != NULL && _decode_begin > begin)
begin = _decode_begin;
return begin;
}
-
+#endif // !PRODUCT
GrowableArray* CodeBuffer::create_patch_overflow() {
if (_overflow_arena == NULL) {
@@ -505,18 +479,6 @@ void CodeBuffer::compute_final_layout(CodeBuffer* dest) const {
} else {
guarantee(padding == 0, "In first iteration no padding should be needed.");
}
- #ifdef ASSERT
- if (prev_cs != NULL && prev_cs->is_frozen() && n < (SECT_LIMIT - 1)) {
- // Make sure the ends still match up.
- // This is important because a branch in a frozen section
- // might target code in a following section, via a Label,
- // and without a relocation record. See Label::patch_instructions.
- address dest_start = buf+buf_offset;
- csize_t start2start = cs->start() - prev_cs->start();
- csize_t dest_start2start = dest_start - prev_dest_cs->start();
- assert(start2start == dest_start2start, "cannot stretch frozen sect");
- }
- #endif //ASSERT
prev_dest_cs = dest_cs;
prev_cs = cs;
}
@@ -752,7 +714,7 @@ void CodeBuffer::copy_code_to(CodeBlob* dest_blob) {
relocate_code_to(&dest);
// transfer strings and comments from buffer to blob
- dest_blob->set_strings(_code_strings);
+ NOT_PRODUCT(dest_blob->set_strings(_code_strings);)
// Done moving code bytes; were they the right size?
assert((int)align_up(dest.total_content_size(), oopSize) == dest_blob->content_size(), "sanity");
@@ -895,9 +857,6 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
// Resizing must be allowed
{
if (blob() == NULL) return; // caller must check for blob == NULL
- for (int n = 0; n < (int)SECT_LIMIT; n++) {
- guarantee(!code_section(n)->is_frozen(), "resizing not allowed when frozen");
- }
}
// Figure new capacity for each section.
@@ -957,12 +916,11 @@ void CodeBuffer::expand(CodeSection* which_cs, csize_t amount) {
debug_only(Copy::fill_to_bytes(bxp->_total_start, bxp->_total_size,
badCodeHeapFreeVal));
- _decode_begin = NULL; // sanity
-
// Make certain that the new sections are all snugly inside the new blob.
verify_section_allocation();
#ifndef PRODUCT
+ _decode_begin = NULL; // sanity
if (PrintNMethods && (WizardMode || Verbose)) {
tty->print("expanded CodeBuffer:");
this->print();
@@ -1032,10 +990,6 @@ void CodeBuffer::log_section_sizes(const char* name) {
#ifndef PRODUCT
-void CodeSection::decode() {
- Disassembler::decode(start(), end());
-}
-
void CodeBuffer::block_comment(intptr_t offset, const char * comment) {
if (_collect_comments) {
_code_strings.add_comment(offset, comment);
@@ -1054,8 +1008,12 @@ class CodeString: public CHeapObj {
CodeString* _prev;
intptr_t _offset;
+ static long allocated_code_strings;
+
~CodeString() {
assert(_next == NULL && _prev == NULL, "wrong interface for freeing list");
+ allocated_code_strings--;
+ log_trace(codestrings)("Freeing CodeString [%s] (%p)", _string, (void*)_string);
os::free((void*)_string);
}
@@ -1064,12 +1022,14 @@ class CodeString: public CHeapObj {
public:
CodeString(const char * string, intptr_t offset = -1)
: _next(NULL), _prev(NULL), _offset(offset) {
+ allocated_code_strings++;
_string = os::strdup(string, mtCode);
+ log_trace(codestrings)("Created CodeString [%s] (%p)", _string, (void*)_string);
}
const char * string() const { return _string; }
intptr_t offset() const { assert(_offset >= 0, "offset for non comment?"); return _offset; }
- CodeString* next() const { return _next; }
+ CodeString* next() const { return _next; }
void set_next(CodeString* next) {
_next = next;
@@ -1094,6 +1054,10 @@ class CodeString: public CHeapObj {
}
};
+// For tracing statistics. Will use raw increment/decrement, so it might not be
+// exact
+long CodeString::allocated_code_strings = 0;
+
CodeString* CodeStrings::find(intptr_t offset) const {
CodeString* a = _strings->first_comment();
while (a != NULL && a->offset() != offset) {
@@ -1116,7 +1080,7 @@ void CodeStrings::add_comment(intptr_t offset, const char * comment) {
CodeString* c = new CodeString(comment, offset);
CodeString* inspos = (_strings == NULL) ? NULL : find_last(offset);
- if (inspos) {
+ if (inspos != NULL) {
// insert after already existing comments with same offset
c->set_next(inspos->next());
inspos->set_next(c);
@@ -1130,21 +1094,10 @@ void CodeStrings::add_comment(intptr_t offset, const char * comment) {
}
}
-void CodeStrings::assign(CodeStrings& other) {
- other.check_valid();
- assert(is_null(), "Cannot assign onto non-empty CodeStrings");
- _strings = other._strings;
- _strings_last = other._strings_last;
-#ifdef ASSERT
- _defunct = false;
-#endif
- other.set_null_and_invalidate();
-}
-
// Deep copy of CodeStrings for consistent memory management.
-// Only used for actual disassembly so this is cheaper than reference counting
-// for the "normal" fastdebug case.
void CodeStrings::copy(CodeStrings& other) {
+ log_debug(codestrings)("Copying %d Codestring(s)", other.count());
+
other.check_valid();
check_valid();
assert(is_null(), "Cannot copy onto non-empty CodeStrings");
@@ -1152,7 +1105,11 @@ void CodeStrings::copy(CodeStrings& other) {
CodeString** ps = &_strings;
CodeString* prev = NULL;
while (n != NULL) {
- *ps = new CodeString(n->string(),n->offset());
+ if (n->is_comment()) {
+ *ps = new CodeString(n->string(), n->offset());
+ } else {
+ *ps = new CodeString(n->string());
+ }
(*ps)->_prev = prev;
prev = *ps;
ps = &((*ps)->_next);
@@ -1162,13 +1119,6 @@ void CodeStrings::copy(CodeStrings& other) {
const char* CodeStrings::_prefix = " ;; "; // default: can be changed via set_prefix
-// Check if any block comments are pending for the given offset.
-bool CodeStrings::has_block_comment(intptr_t offset) const {
- if (_strings == NULL) return false;
- CodeString* c = find(offset);
- return c != NULL;
-}
-
void CodeStrings::print_block_comment(outputStream* stream, intptr_t offset) const {
check_valid();
if (_strings != NULL) {
@@ -1184,8 +1134,19 @@ void CodeStrings::print_block_comment(outputStream* stream, intptr_t offset) con
}
}
-// Also sets isNull()
+int CodeStrings::count() const {
+ int i = 0;
+ CodeString* s = _strings;
+ while (s != NULL) {
+ i++;
+ s = s->_next;
+ }
+ return i;
+}
+
+// Also sets is_null()
void CodeStrings::free() {
+ log_debug(codestrings)("Freeing %d out of approx. %ld CodeString(s), ", count(), CodeString::allocated_code_strings);
CodeString* n = _strings;
while (n) {
// unlink the node from the list saving a pointer to the next
@@ -1215,15 +1176,14 @@ const char* CodeStrings::add_string(const char * string) {
void CodeBuffer::decode() {
ttyLocker ttyl;
- Disassembler::decode(decode_begin(), insts_end(), tty);
+ Disassembler::decode(decode_begin(), insts_end(), tty NOT_PRODUCT(COMMA &strings()));
_decode_begin = insts_end();
}
void CodeSection::print(const char* name) {
csize_t locs_size = locs_end() - locs_start();
- tty->print_cr(" %7s.code = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d)%s",
- name, p2i(start()), p2i(end()), p2i(limit()), size(), capacity(),
- is_frozen()? " [frozen]": "");
+ tty->print_cr(" %7s.code = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d)",
+ name, p2i(start()), p2i(end()), p2i(limit()), size(), capacity());
tty->print_cr(" %7s.locs = " PTR_FORMAT " : " PTR_FORMAT " : " PTR_FORMAT " (%d of %d) point=%d",
name, p2i(locs_start()), p2i(locs_end()), p2i(locs_limit()), locs_size, locs_capacity(), locs_point_off());
if (PrintRelocations) {
@@ -1246,10 +1206,4 @@ void CodeBuffer::print() {
}
}
-// Directly disassemble code buffer.
-void CodeBuffer::decode(address start, address end) {
- ttyLocker ttyl;
- Disassembler::decode(this, start, end, tty);
-}
-
#endif // PRODUCT
diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp
index 6363ea41193..255342cbbd0 100644
--- a/src/hotspot/share/asm/codeBuffer.hpp
+++ b/src/hotspot/share/asm/codeBuffer.hpp
@@ -92,7 +92,6 @@ class CodeSection {
relocInfo* _locs_limit; // first byte after relocation information buf
address _locs_point; // last relocated position (grows upward)
bool _locs_own; // did I allocate the locs myself?
- bool _frozen; // no more expansion of this section
bool _scratch_emit; // Buffer is used for scratch emit, don't relocate.
char _index; // my section number (SECT_INST, etc.)
CodeBuffer* _outer; // enclosing CodeBuffer
@@ -109,7 +108,6 @@ class CodeSection {
_locs_limit = NULL;
_locs_point = NULL;
_locs_own = false;
- _frozen = false;
_scratch_emit = false;
debug_only(_index = (char)-1);
debug_only(_outer = (CodeBuffer*)badAddress);
@@ -161,12 +159,10 @@ class CodeSection {
address locs_point() const { return _locs_point; }
csize_t locs_point_off() const{ return (csize_t)(_locs_point - _start); }
csize_t locs_capacity() const { return (csize_t)(_locs_limit - _locs_start); }
- csize_t locs_remaining()const { return (csize_t)(_locs_limit - _locs_end); }
int index() const { return _index; }
bool is_allocated() const { return _start != NULL; }
bool is_empty() const { return _start == _end; }
- bool is_frozen() const { return _frozen; }
bool has_locs() const { return _locs_end != NULL; }
// Mark scratch buffer.
@@ -184,8 +180,6 @@ class CodeSection {
void set_end(address pc) { assert(allocates2(pc), "not in CodeBuffer memory: " INTPTR_FORMAT " <= " INTPTR_FORMAT " <= " INTPTR_FORMAT, p2i(_start), p2i(pc), p2i(_limit)); _end = pc; }
void set_mark(address pc) { assert(contains2(pc), "not in codeBuffer");
_mark = pc; }
- void set_mark_off(int offset) { assert(contains2(offset+_start),"not in codeBuffer");
- _mark = offset + _start; }
void set_mark() { _mark = _end; }
void clear_mark() { _mark = NULL; }
@@ -259,10 +253,6 @@ class CodeSection {
csize_t align_at_start(csize_t off) const { return (csize_t) align_up(off, alignment()); }
- // Mark a section frozen. Assign its remaining space to
- // the following section. It will never expand after this point.
- inline void freeze(); // { _outer->freeze_section(this); }
-
// Ensure there's enough space left in the current section.
// Return true if there was an expansion.
bool maybe_expand_to_ensure_remaining(csize_t amount);
@@ -284,20 +274,18 @@ class CodeStrings {
bool _defunct; // Zero bit pattern is "valid", see memset call in decode_env::decode_env
#endif
static const char* _prefix; // defaults to " ;; "
-#endif
CodeString* find(intptr_t offset) const;
CodeString* find_last(intptr_t offset) const;
void set_null_and_invalidate() {
-#ifndef PRODUCT
_strings = NULL;
_strings_last = NULL;
#ifdef ASSERT
_defunct = true;
-#endif
#endif
}
+#endif
public:
CodeStrings() {
@@ -310,6 +298,7 @@ class CodeStrings {
#endif
}
+#ifndef PRODUCT
bool is_null() {
#ifdef ASSERT
return _strings == NULL;
@@ -318,30 +307,25 @@ class CodeStrings {
#endif
}
- const char* add_string(const char * string) PRODUCT_RETURN_(return NULL;);
+ const char* add_string(const char * string);
- void add_comment(intptr_t offset, const char * comment) PRODUCT_RETURN;
- bool has_block_comment(intptr_t offset) const;
- void print_block_comment(outputStream* stream, intptr_t offset) const PRODUCT_RETURN;
- // MOVE strings from other to this; invalidate other.
- void assign(CodeStrings& other) PRODUCT_RETURN;
+ void add_comment(intptr_t offset, const char * comment);
+ void print_block_comment(outputStream* stream, intptr_t offset) const;
+ int count() const;
// COPY strings from other to this; leave other valid.
- void copy(CodeStrings& other) PRODUCT_RETURN;
+ void copy(CodeStrings& other);
// FREE strings; invalidate this.
- void free() PRODUCT_RETURN;
+ void free();
// Guarantee that _strings are used at most once; assign and free invalidate a buffer.
inline void check_valid() const {
-#ifdef ASSERT
assert(!_defunct, "Use of invalid CodeStrings");
-#endif
}
static void set_prefix(const char *prefix) {
-#ifndef PRODUCT
_prefix = prefix;
-#endif
}
+#endif // !PRODUCT
};
// A CodeBuffer describes a memory space into which assembly
@@ -410,8 +394,7 @@ class CodeBuffer: public StackObj {
csize_t _total_size; // size in bytes of combined memory buffer
OopRecorder* _oop_recorder;
- CodeStrings _code_strings;
- bool _collect_comments; // Indicate if we need to collect block comments at all.
+
OopRecorder _default_oop_recorder; // override with initialize_oop_recorder
Arena* _overflow_arena;
@@ -421,8 +404,12 @@ class CodeBuffer: public StackObj {
bool _immutable_PIC;
#endif
- address _decode_begin; // start address for decode
+#ifndef PRODUCT
+ CodeStrings _code_strings;
+ bool _collect_comments; // Indicate if we need to collect block comments at all.
+ address _decode_begin; // start address for decode
address decode_begin();
+#endif
void initialize_misc(const char * name) {
// all pointers other than code_start/end and those inside the sections
@@ -431,14 +418,15 @@ class CodeBuffer: public StackObj {
_before_expand = NULL;
_blob = NULL;
_oop_recorder = NULL;
- _decode_begin = NULL;
_overflow_arena = NULL;
- _code_strings = CodeStrings();
_last_insn = NULL;
#if INCLUDE_AOT
_immutable_PIC = false;
#endif
+#ifndef PRODUCT
+ _decode_begin = NULL;
+ _code_strings = CodeStrings();
// Collect block comments, but restrict collection to cases where a disassembly is output.
_collect_comments = ( PrintAssembly
|| PrintStubCode
@@ -447,6 +435,7 @@ class CodeBuffer: public StackObj {
|| PrintSignatureHandlers
|| UnlockDiagnosticVMOptions
);
+#endif
}
void initialize(address code_start, csize_t code_size) {
@@ -464,8 +453,6 @@ class CodeBuffer: public StackObj {
void initialize_section_size(CodeSection* cs, csize_t size);
- void freeze_section(CodeSection* cs);
-
// helper for CodeBuffer::expand()
void take_over_code_from(CodeBuffer* cs);
@@ -557,7 +544,11 @@ class CodeBuffer: public StackObj {
static int locator_sect(int locator) { return locator & sect_mask; }
static int locator(int pos, int sect) { return (pos << sect_bits) | sect; }
int locator(address addr) const;
- address locator_address(int locator) const;
+ address locator_address(int locator) const {
+ if (locator < 0) return NULL;
+ address start = code_section(locator_sect(locator))->start();
+ return start + locator_pos(locator);
+ }
// Heuristic for pre-packing the taken/not-taken bit of a predicted branch.
bool is_backward_branch(Label& L);
@@ -574,10 +565,8 @@ class CodeBuffer: public StackObj {
address insts_begin() const { return _insts.start(); }
address insts_end() const { return _insts.end(); }
void set_insts_end(address end) { _insts.set_end(end); }
- address insts_limit() const { return _insts.limit(); }
address insts_mark() const { return _insts.mark(); }
void set_insts_mark() { _insts.set_mark(); }
- void clear_insts_mark() { _insts.clear_mark(); }
// is there anything in the buffer other than the current section?
bool is_pure() const { return insts_size() == total_content_size(); }
@@ -635,35 +624,21 @@ class CodeBuffer: public StackObj {
// Override default oop recorder.
void initialize_oop_recorder(OopRecorder* r);
- OopRecorder* oop_recorder() const { return _oop_recorder; }
- CodeStrings& strings() { return _code_strings; }
+ OopRecorder* oop_recorder() const { return _oop_recorder; }
address last_insn() const { return _last_insn; }
void set_last_insn(address a) { _last_insn = a; }
void clear_last_insn() { set_last_insn(NULL); }
+#ifndef PRODUCT
+ CodeStrings& strings() { return _code_strings; }
+
void free_strings() {
if (!_code_strings.is_null()) {
_code_strings.free(); // sets _strings Null as a side-effect.
}
}
-
- // Directly disassemble code buffer.
- // Print the comment associated with offset on stream, if there is one.
- virtual void print_block_comment(outputStream* stream, address block_begin) {
-#ifndef PRODUCT
- intptr_t offset = (intptr_t)(block_begin - _total_start); // I assume total_start is not correct for all code sections.
- _code_strings.print_block_comment(stream, offset);
-#endif
- }
- bool has_block_comment(address block_begin) {
-#ifndef PRODUCT
- intptr_t offset = (intptr_t)(block_begin - _total_start); // I assume total_start is not correct for all code sections.
- return _code_strings.has_block_comment(offset);
-#else
- return false;
#endif
- }
// Code generation
void relocate(address at, RelocationHolder const& rspec, int format = 0) {
@@ -688,9 +663,6 @@ class CodeBuffer: public StackObj {
}
}
- // Transform an address from the code in this code buffer to a specified code buffer
- address transform_address(const CodeBuffer &cb, address addr) const;
-
void block_comment(intptr_t offset, const char * comment) PRODUCT_RETURN;
const char* code_string(const char* str) PRODUCT_RETURN_(return NULL;);
@@ -719,11 +691,6 @@ class CodeBuffer: public StackObj {
};
-
-inline void CodeSection::freeze() {
- _outer->freeze_section(this);
-}
-
inline bool CodeSection::maybe_expand_to_ensure_remaining(csize_t amount) {
if (remaining() < amount) { _outer->expand(this, amount); return true; }
return false;
diff --git a/src/hotspot/share/c1/c1_CFGPrinter.cpp b/src/hotspot/share/c1/c1_CFGPrinter.cpp
index 21244d92a18..9cd617d5428 100644
--- a/src/hotspot/share/c1/c1_CFGPrinter.cpp
+++ b/src/hotspot/share/c1/c1_CFGPrinter.cpp
@@ -325,7 +325,7 @@ void CFGPrinterOutput::print_intervals(IntervalList* intervals, const char* name
for (int i = 0; i < intervals->length(); i++) {
if (intervals->at(i) != NULL) {
- intervals->at(i)->print_on(output());
+ intervals->at(i)->print_on(output(), true);
}
}
diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp
index cd45e39f0b6..961f33ca69e 100644
--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
+++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
@@ -89,6 +89,28 @@ class CodeStubList: public GrowableArray {
}
};
+class C1SafepointPollStub: public CodeStub {
+ private:
+ uintptr_t _safepoint_offset;
+
+ public:
+ C1SafepointPollStub() :
+ _safepoint_offset(0) {
+ }
+
+ uintptr_t safepoint_offset() { return _safepoint_offset; }
+ void set_safepoint_offset(uintptr_t safepoint_offset) { _safepoint_offset = safepoint_offset; }
+
+ virtual void emit_code(LIR_Assembler* e);
+ virtual void visit(LIR_OpVisitState* visitor) {
+ // don't pass in the code emit info since it's processed in the fast path
+ visitor->do_slow_case();
+ }
+#ifndef PRODUCT
+ virtual void print_name(outputStream* out) const { out->print("C1SafepointPollStub"); }
+#endif // PRODUCT
+};
+
class CounterOverflowStub: public CodeStub {
private:
CodeEmitInfo* _info;
diff --git a/src/hotspot/share/c1/c1_IR.hpp b/src/hotspot/share/c1/c1_IR.hpp
index ab63b32c14b..10501291e88 100644
--- a/src/hotspot/share/c1/c1_IR.hpp
+++ b/src/hotspot/share/c1/c1_IR.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -244,7 +244,11 @@ class IRScopeDebugInfo: public CompilationResourceObj {
bool reexecute = topmost ? should_reexecute() : false;
bool return_oop = false; // This flag will be ignored since it used only for C2 with escape analysis.
bool rethrow_exception = false;
- recorder->describe_scope(pc_offset, methodHandle(), scope()->method(), bci(), reexecute, rethrow_exception, is_method_handle_invoke, return_oop, locvals, expvals, monvals);
+ bool has_ea_local_in_scope = false;
+ bool arg_escape = false;
+ recorder->describe_scope(pc_offset, methodHandle(), scope()->method(), bci(),
+ reexecute, rethrow_exception, is_method_handle_invoke, return_oop,
+ has_ea_local_in_scope, arg_escape, locvals, expvals, monvals);
}
};
diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
index e0e2b2fb6e4..1d6d33a2be1 100644
--- a/src/hotspot/share/c1/c1_LIR.cpp
+++ b/src/hotspot/share/c1/c1_LIR.cpp
@@ -23,11 +23,13 @@
*/
#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
#include "c1/c1_InstructionPrinter.hpp"
#include "c1/c1_LIR.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_ValueStack.hpp"
#include "ci/ciInstance.hpp"
+#include "runtime/safepointMechanism.inline.hpp"
#include "runtime/sharedRuntime.hpp"
Register LIR_OprDesc::as_register() const {
@@ -447,7 +449,6 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
case lir_fld: // input always valid, result and info always invalid
case lir_push: // input always valid, result and info always invalid
case lir_pop: // input always valid, result and info always invalid
- case lir_return: // input always valid, result and info always invalid
case lir_leal: // input and result always valid, info always invalid
case lir_monaddr: // input and result always valid, info always invalid
case lir_null_check: // input and info always valid, result always invalid
@@ -463,6 +464,19 @@ void LIR_OpVisitState::visit(LIR_Op* op) {
break;
}
+ case lir_return:
+ {
+ assert(op->as_OpReturn() != NULL, "must be");
+ LIR_OpReturn* op_ret = (LIR_OpReturn*)op;
+
+ if (op_ret->_info) do_info(op_ret->_info);
+ if (op_ret->_opr->is_valid()) do_input(op_ret->_opr);
+ if (op_ret->_result->is_valid()) do_output(op_ret->_result);
+ if (op_ret->stub() != NULL) do_stub(op_ret->stub());
+
+ break;
+ }
+
case lir_safepoint:
{
assert(op->as_Op1() != NULL, "must be");
@@ -948,6 +962,15 @@ bool LIR_OpVisitState::no_operands(LIR_Op* op) {
}
#endif
+// LIR_OpReturn
+LIR_OpReturn::LIR_OpReturn(LIR_Opr opr) :
+ LIR_Op1(lir_return, opr, (CodeEmitInfo*)NULL /* info */),
+ _stub(NULL) {
+ if (VM_Version::supports_stack_watermark_barrier()) {
+ _stub = new C1SafepointPollStub();
+ }
+}
+
//---------------------------------------------------
diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
index 787a22fff11..dcd2d50b327 100644
--- a/src/hotspot/share/c1/c1_LIR.hpp
+++ b/src/hotspot/share/c1/c1_LIR.hpp
@@ -36,6 +36,7 @@ class LIR_Assembler;
class CodeEmitInfo;
class CodeStub;
class CodeStubList;
+class C1SafepointPollStub;
class ArrayCopyStub;
class LIR_Op;
class ciType;
@@ -856,6 +857,7 @@ class LIR_Op1;
class LIR_OpBranch;
class LIR_OpConvert;
class LIR_OpAllocObj;
+class LIR_OpReturn;
class LIR_OpRoundFP;
class LIR_Op2;
class LIR_OpDelay;
@@ -1116,6 +1118,7 @@ class LIR_Op: public CompilationResourceObj {
virtual LIR_OpAllocObj* as_OpAllocObj() { return NULL; }
virtual LIR_OpRoundFP* as_OpRoundFP() { return NULL; }
virtual LIR_OpBranch* as_OpBranch() { return NULL; }
+ virtual LIR_OpReturn* as_OpReturn() { return NULL; }
virtual LIR_OpRTCall* as_OpRTCall() { return NULL; }
virtual LIR_OpConvert* as_OpConvert() { return NULL; }
virtual LIR_Op0* as_Op0() { return NULL; }
@@ -1439,6 +1442,18 @@ class LIR_OpBranch: public LIR_Op {
virtual void print_instr(outputStream* out) const PRODUCT_RETURN;
};
+class LIR_OpReturn: public LIR_Op1 {
+ friend class LIR_OpVisitState;
+
+ private:
+ C1SafepointPollStub* _stub;
+
+ public:
+ LIR_OpReturn(LIR_Opr opr);
+
+ C1SafepointPollStub* stub() const { return _stub; }
+ virtual LIR_OpReturn* as_OpReturn() { return this; }
+};
class ConversionStub;
@@ -2094,9 +2109,8 @@ class LIR_List: public CompilationResourceObj {
void metadata2reg (Metadata* o, LIR_Opr reg) { assert(reg->type() == T_METADATA, "bad reg"); append(new LIR_Op1(lir_move, LIR_OprFact::metadataConst(o), reg)); }
void klass2reg_patch(Metadata* o, LIR_Opr reg, CodeEmitInfo* info);
- void return_op(LIR_Opr result) { append(new LIR_Op1(lir_return, result)); }
-
void safepoint(LIR_Opr tmp, CodeEmitInfo* info) { append(new LIR_Op1(lir_safepoint, tmp, info)); }
+ void return_op(LIR_Opr result) { append(new LIR_OpReturn(result)); }
void convert(Bytecodes::Code code, LIR_Opr left, LIR_Opr dst, ConversionStub* stub = NULL/*, bool is_32bit = false*/) { append(new LIR_OpConvert(code, left, dst, stub)); }
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.cpp b/src/hotspot/share/c1/c1_LIRAssembler.cpp
index 370f2dad241..c625ac063a6 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.cpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.cpp
@@ -521,9 +521,15 @@ void LIR_Assembler::emit_op1(LIR_Op1* op) {
break;
}
- case lir_return:
- return_op(op->in_opr());
+ case lir_return: {
+ assert(op->as_OpReturn() != NULL, "sanity");
+ LIR_OpReturn *ret_op = (LIR_OpReturn*)op;
+ return_op(ret_op->in_opr(), ret_op->stub());
+ if (ret_op->stub() != NULL) {
+ append_code_stub(ret_op->stub());
+ }
break;
+ }
case lir_safepoint:
if (compilation()->debug_info_recorder()->last_pc_offset() == code_offset()) {
diff --git a/src/hotspot/share/c1/c1_LIRAssembler.hpp b/src/hotspot/share/c1/c1_LIRAssembler.hpp
index ab4f82ddcd3..a40b084a7bf 100644
--- a/src/hotspot/share/c1/c1_LIRAssembler.hpp
+++ b/src/hotspot/share/c1/c1_LIRAssembler.hpp
@@ -157,7 +157,7 @@ class LIR_Assembler: public CompilationResourceObj {
// particular sparc uses this for delay slot filling.
void peephole(LIR_List* list);
- void return_op(LIR_Opr result);
+ void return_op(LIR_Opr result, C1SafepointPollStub* code_stub);
// returns offset of poll instruction
int safepoint_poll(LIR_Opr result, CodeEmitInfo* info);
diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
index f5713821a81..782fa7ada03 100644
--- a/src/hotspot/share/c1/c1_LinearScan.cpp
+++ b/src/hotspot/share/c1/c1_LinearScan.cpp
@@ -3212,6 +3212,12 @@ void LinearScan::print_reg_num(outputStream* out, int reg_num) {
return;
}
+ LIR_Opr opr = get_operand(reg_num);
+ assert(opr->is_valid(), "unknown register");
+ opr->print(out);
+}
+
+LIR_Opr LinearScan::get_operand(int reg_num) {
LIR_Opr opr = LIR_OprFact::illegal();
#ifdef X86
@@ -3231,9 +3237,9 @@ void LinearScan::print_reg_num(outputStream* out, int reg_num) {
opr = LIR_OprFact::single_xmm(reg_num - pd_first_xmm_reg);
#endif
} else {
- assert(false, "unknown register");
+ // reg_num == -1 or a virtual register, return the illegal operand
}
- opr->print(out);
+ return opr;
}
Interval* LinearScan::find_interval_at(int reg_num) const {
@@ -4598,7 +4604,7 @@ bool Interval::intersects_any_children_of(Interval* interval) const {
#ifndef PRODUCT
-void Interval::print_on(outputStream* out) const {
+void Interval::print_on(outputStream* out, bool is_cfg_printer) const {
const char* SpillState2Name[] = { "no definition", "no spill store", "one spill store", "store at definition", "start in memory", "no optimization" };
const char* UseKind2Name[] = { "N", "L", "S", "M" };
@@ -4608,18 +4614,29 @@ void Interval::print_on(outputStream* out) const {
} else {
type_name = type2name(type());
}
-
out->print("%d %s ", reg_num(), type_name);
- if (reg_num() < LIR_OprDesc::vreg_base) {
- LinearScan::print_reg_num(out, assigned_reg());
- } else if (assigned_reg() != -1 && (LinearScan::num_physical_regs(type()) == 1 || assigned_regHi() != -1)) {
- LinearScan::calc_operand_for_interval(this)->print(out);
+
+ if (is_cfg_printer) {
+ // Special version for compatibility with C1 Visualizer.
+ LIR_Opr opr = LinearScan::get_operand(reg_num());
+ if (opr->is_valid()) {
+ out->print("\"");
+ opr->print(out);
+ out->print("\" ");
+ }
} else {
- // Virtual register that has no assigned register yet.
- out->print("[ANY]");
+ // Improved output for normal debugging.
+ if (reg_num() < LIR_OprDesc::vreg_base) {
+ LinearScan::print_reg_num(out, assigned_reg());
+ } else if (assigned_reg() != -1 && (LinearScan::num_physical_regs(type()) == 1 || assigned_regHi() != -1)) {
+ LinearScan::calc_operand_for_interval(this)->print(out);
+ } else {
+ // Virtual register that has no assigned register yet.
+ out->print("[ANY]");
+ }
+ out->print(" ");
}
-
- out->print(" %d %d ", split_parent()->reg_num(), (register_hint(false) != NULL ? register_hint(false)->reg_num() : -1));
+ out->print("%d %d ", split_parent()->reg_num(), (register_hint(false) != NULL ? register_hint(false)->reg_num() : -1));
// print ranges
Range* cur = _first;
@@ -5436,7 +5453,7 @@ bool LinearScanWalker::alloc_free_reg(Interval* cur) {
hint_reg = register_hint->assigned_reg();
hint_regHi = register_hint->assigned_regHi();
- if (allocator()->is_precolored_cpu_interval(register_hint)) {
+ if (_num_phys_regs == 2 && allocator()->is_precolored_cpu_interval(register_hint)) {
assert(hint_reg != any_reg && hint_regHi == any_reg, "must be for fixed intervals");
hint_regHi = hint_reg + 1; // connect e.g. eax-edx
}
@@ -6410,7 +6427,7 @@ void ControlFlowOptimizer::delete_jumps_to_return(BlockList* code) {
if (pred_last_branch->block() == block && pred_last_branch->cond() == lir_cond_always && pred_last_branch->info() == NULL) {
// replace the jump to a return with a direct return
// Note: currently the edge between the blocks is not deleted
- pred_instructions->at_put(pred_instructions->length() - 1, new LIR_Op1(lir_return, return_opr));
+ pred_instructions->at_put(pred_instructions->length() - 1, new LIR_OpReturn(return_opr));
#ifdef ASSERT
return_converted.set_bit(pred->block_id());
#endif
diff --git a/src/hotspot/share/c1/c1_LinearScan.hpp b/src/hotspot/share/c1/c1_LinearScan.hpp
index 8d86a0d7836..0249453d9c1 100644
--- a/src/hotspot/share/c1/c1_LinearScan.hpp
+++ b/src/hotspot/share/c1/c1_LinearScan.hpp
@@ -369,6 +369,7 @@ class LinearScan : public CompilationResourceObj {
void print_lir(int level, const char* label, bool hir_valid = true);
static void print_reg_num(int reg_num) { print_reg_num(tty, reg_num); }
static void print_reg_num(outputStream* out, int reg_num);
+ static LIR_Opr get_operand(int reg_num);
#endif
#ifdef ASSERT
@@ -633,7 +634,11 @@ class Interval : public CompilationResourceObj {
// printing
#ifndef PRODUCT
void print() const { print_on(tty); }
- void print_on(outputStream* out) const;
+ void print_on(outputStream* out) const {
+ print_on(out, false);
+ }
+ // Special version for compatibility with C1 Visualizer.
+ void print_on(outputStream* out, bool is_cfg_printer) const;
// Used for debugging
void print_parent() const;
diff --git a/src/hotspot/share/c1/c1_Runtime1.cpp b/src/hotspot/share/c1/c1_Runtime1.cpp
index 9c433f648ec..6c776670cac 100644
--- a/src/hotspot/share/c1/c1_Runtime1.cpp
+++ b/src/hotspot/share/c1/c1_Runtime1.cpp
@@ -63,6 +63,7 @@
#include "runtime/interfaceSupport.inline.hpp"
#include "runtime/javaCalls.hpp"
#include "runtime/sharedRuntime.hpp"
+#include "runtime/stackWatermarkSet.hpp"
#include "runtime/threadCritical.hpp"
#include "runtime/vframe.inline.hpp"
#include "runtime/vframeArray.hpp"
@@ -505,6 +506,17 @@ JRT_ENTRY_NO_ASYNC(static address, exception_handler_for_pc_helper(JavaThread* t
thread->set_is_method_handle_return(false);
Handle exception(thread, ex);
+
+ // This function is called when we are about to throw an exception. Therefore,
+ // we have to poll the stack watermark barrier to make sure that not yet safe
+ // stack frames are made safe before returning into them.
+ if (thread->last_frame().cb() == Runtime1::blob_for(Runtime1::handle_exception_from_callee_id)) {
+ // The Runtime1::handle_exception_from_callee_id handler is invoked after the
+ // frame has been unwound. It instead builds its own stub frame, to call the
+ // runtime. But the throwing frame has already been unwound here.
+ StackWatermarkSet::after_unwind(thread);
+ }
+
nm = CodeCache::find_nmethod(pc);
assert(nm != NULL, "this is not an nmethod");
// Adjust the pc as needed/
diff --git a/src/hotspot/share/ci/bcEscapeAnalyzer.hpp b/src/hotspot/share/ci/bcEscapeAnalyzer.hpp
index 8b84a24c449..db77b9c580e 100644
--- a/src/hotspot/share/ci/bcEscapeAnalyzer.hpp
+++ b/src/hotspot/share/ci/bcEscapeAnalyzer.hpp
@@ -88,8 +88,6 @@ class BCEscapeAnalyzer : public ResourceObj {
void set_modified(ArgumentMap vars, int offs, int size);
bool is_recursive_call(ciMethod* callee);
- void add_dependence(ciKlass *klass, ciMethod *meth);
- void propagate_dependencies(ciMethod *meth);
void invoke(StateInfo &state, Bytecodes::Code code, ciMethod* target, ciKlass* holder);
void iterate_one_block(ciBlock *blk, StateInfo &state, GrowableArray &successors);
diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
index 43a66cc7a08..f4d550460d6 100644
--- a/src/hotspot/share/ci/ciEnv.cpp
+++ b/src/hotspot/share/ci/ciEnv.cpp
@@ -59,6 +59,7 @@
#include "oops/objArrayOop.inline.hpp"
#include "oops/oop.inline.hpp"
#include "prims/jvmtiExport.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/handles.inline.hpp"
#include "runtime/init.hpp"
#include "runtime/reflection.hpp"
@@ -241,6 +242,7 @@ bool ciEnv::cache_jvmti_state() {
_jvmti_can_post_on_exceptions = JvmtiExport::can_post_on_exceptions();
_jvmti_can_pop_frame = JvmtiExport::can_pop_frame();
_jvmti_can_get_owned_monitor_info = JvmtiExport::can_get_owned_monitor_info();
+ _jvmti_can_walk_any_space = JvmtiExport::can_walk_any_space();
return _task != NULL && _task->method()->is_old();
}
@@ -270,6 +272,10 @@ bool ciEnv::jvmti_state_changed() const {
JvmtiExport::can_get_owned_monitor_info()) {
return true;
}
+ if (!_jvmti_can_walk_any_space &&
+ JvmtiExport::can_walk_any_space()) {
+ return true;
+ }
return false;
}
@@ -755,34 +761,29 @@ Method* ciEnv::lookup_method(ciInstanceKlass* accessor,
Symbol* sig,
Bytecodes::Code bc,
constantTag tag) {
- // Accessibility checks are performed in ciEnv::get_method_by_index_impl.
- assert(check_klass_accessibility(accessor, holder->get_Klass()), "holder not accessible");
-
InstanceKlass* accessor_klass = accessor->get_instanceKlass();
Klass* holder_klass = holder->get_Klass();
- Method* dest_method;
- LinkInfo link_info(holder_klass, name, sig, accessor_klass, LinkInfo::AccessCheck::required, LinkInfo::LoaderConstraintCheck::required, tag);
+
+ // Accessibility checks are performed in ciEnv::get_method_by_index_impl.
+ assert(check_klass_accessibility(accessor, holder_klass), "holder not accessible");
+
+ LinkInfo link_info(holder_klass, name, sig, accessor_klass,
+ LinkInfo::AccessCheck::required,
+ LinkInfo::LoaderConstraintCheck::required,
+ tag);
switch (bc) {
- case Bytecodes::_invokestatic:
- dest_method =
- LinkResolver::resolve_static_call_or_null(link_info);
- break;
- case Bytecodes::_invokespecial:
- dest_method =
- LinkResolver::resolve_special_call_or_null(link_info);
- break;
- case Bytecodes::_invokeinterface:
- dest_method =
- LinkResolver::linktime_resolve_interface_method_or_null(link_info);
- break;
- case Bytecodes::_invokevirtual:
- dest_method =
- LinkResolver::linktime_resolve_virtual_method_or_null(link_info);
- break;
- default: ShouldNotReachHere();
+ case Bytecodes::_invokestatic:
+ return LinkResolver::resolve_static_call_or_null(link_info);
+ case Bytecodes::_invokespecial:
+ return LinkResolver::resolve_special_call_or_null(link_info);
+ case Bytecodes::_invokeinterface:
+ return LinkResolver::linktime_resolve_interface_method_or_null(link_info);
+ case Bytecodes::_invokevirtual:
+ return LinkResolver::linktime_resolve_virtual_method_or_null(link_info);
+ default:
+ fatal("Unhandled bytecode: %s", Bytecodes::name(bc));
+ return NULL; // silence compiler warnings
}
-
- return dest_method;
}
diff --git a/src/hotspot/share/ci/ciEnv.hpp b/src/hotspot/share/ci/ciEnv.hpp
index 65949bde48a..da7b65a1325 100644
--- a/src/hotspot/share/ci/ciEnv.hpp
+++ b/src/hotspot/share/ci/ciEnv.hpp
@@ -27,6 +27,7 @@
#include "ci/ciClassList.hpp"
#include "ci/ciObjectFactory.hpp"
+#include "classfile/systemDictionary.hpp"
#include "code/debugInfoRec.hpp"
#include "code/dependencies.hpp"
#include "code/exceptionHandlerTable.hpp"
@@ -73,6 +74,7 @@ class ciEnv : StackObj {
bool _jvmti_can_post_on_exceptions;
bool _jvmti_can_pop_frame;
bool _jvmti_can_get_owned_monitor_info; // includes can_get_owned_monitor_stack_depth_info
+ bool _jvmti_can_walk_any_space;
// Cache DTrace flags
bool _dtrace_extended_probes;
@@ -348,6 +350,7 @@ class ciEnv : StackObj {
bool jvmti_can_hotswap_or_post_breakpoint() const { return _jvmti_can_hotswap_or_post_breakpoint; }
bool jvmti_can_post_on_exceptions() const { return _jvmti_can_post_on_exceptions; }
bool jvmti_can_get_owned_monitor_info() const { return _jvmti_can_get_owned_monitor_info; }
+ bool jvmti_can_walk_any_space() const { return _jvmti_can_walk_any_space; }
// Cache DTrace flags
void cache_dtrace_flags();
diff --git a/src/hotspot/share/ci/ciField.cpp b/src/hotspot/share/ci/ciField.cpp
index df4dfc9ae63..f5abad6bc10 100644
--- a/src/hotspot/share/ci/ciField.cpp
+++ b/src/hotspot/share/ci/ciField.cpp
@@ -222,6 +222,7 @@ static bool trust_final_non_static_fields(ciInstanceKlass* holder) {
// Even if general trusting is disabled, trust system-built closures in these packages.
if (holder->is_in_package("java/lang/invoke") || holder->is_in_package("sun/invoke") ||
holder->is_in_package("jdk/internal/foreign") || holder->is_in_package("jdk/incubator/foreign") ||
+ holder->is_in_package("jdk/internal/vm/vector") || holder->is_in_package("jdk/incubator/vector") ||
holder->is_in_package("java/lang"))
return true;
// Trust hidden classes and VM unsafe anonymous classes. They are created via
diff --git a/src/hotspot/share/ci/ciMemberName.cpp b/src/hotspot/share/ci/ciMemberName.cpp
index 978ba2b975c..4306e6e649e 100644
--- a/src/hotspot/share/ci/ciMemberName.cpp
+++ b/src/hotspot/share/ci/ciMemberName.cpp
@@ -34,11 +34,6 @@
// Return: MN.vmtarget
ciMethod* ciMemberName::get_vmtarget() const {
VM_ENTRY_MARK;
- // FIXME: Share code with ciMethodHandle::get_vmtarget
- Metadata* vmtarget = java_lang_invoke_MemberName::vmtarget(get_oop());
- if (vmtarget->is_method())
- return CURRENT_ENV->get_method((Method*) vmtarget);
- // FIXME: What if the vmtarget is a Klass?
- assert(false, "");
- return NULL;
+ Method* vmtarget = java_lang_invoke_MemberName::vmtarget(get_oop());
+ return CURRENT_ENV->get_method(vmtarget);
}
diff --git a/src/hotspot/share/ci/ciMethod.cpp b/src/hotspot/share/ci/ciMethod.cpp
index 3fc9a0f6ebd..de6aace25a9 100644
--- a/src/hotspot/share/ci/ciMethod.cpp
+++ b/src/hotspot/share/ci/ciMethod.cpp
@@ -44,6 +44,7 @@
#include "oops/generateOopMap.hpp"
#include "oops/method.inline.hpp"
#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "prims/nativeLookup.hpp"
#include "runtime/deoptimization.hpp"
#include "runtime/handles.inline.hpp"
@@ -1352,6 +1353,11 @@ bool ciMethod::is_unboxing_method() const {
return false;
}
+bool ciMethod::is_vector_method() const {
+ return (holder() == ciEnv::current()->vector_VectorSupport_klass()) &&
+ (intrinsic_id() != vmIntrinsics::_none);
+}
+
BCEscapeAnalyzer *ciMethod::get_bcea() {
#ifdef COMPILER2
if (_bcea == NULL) {
diff --git a/src/hotspot/share/ci/ciMethod.hpp b/src/hotspot/share/ci/ciMethod.hpp
index 371f974a9ae..37eb0da8db3 100644
--- a/src/hotspot/share/ci/ciMethod.hpp
+++ b/src/hotspot/share/ci/ciMethod.hpp
@@ -30,7 +30,7 @@
#include "ci/ciObject.hpp"
#include "ci/ciSignature.hpp"
#include "compiler/methodLiveness.hpp"
-#include "prims/methodHandles.hpp"
+#include "runtime/handles.hpp"
#include "utilities/bitMap.hpp"
class ciMethodBlocks;
@@ -356,6 +356,7 @@ class ciMethod : public ciMetadata {
bool has_reserved_stack_access() const { return _has_reserved_stack_access; }
bool is_boxing_method() const;
bool is_unboxing_method() const;
+ bool is_vector_method() const;
bool is_object_initializer() const;
bool can_be_statically_bound(ciInstanceKlass* context) const;
diff --git a/src/hotspot/share/ci/ciMethodData.cpp b/src/hotspot/share/ci/ciMethodData.cpp
index fae10b58eb2..76dde0b9f74 100644
--- a/src/hotspot/share/ci/ciMethodData.cpp
+++ b/src/hotspot/share/ci/ciMethodData.cpp
@@ -335,7 +335,10 @@ ciProfileData* ciMethodData::data_at(int data_index) {
return NULL;
}
DataLayout* data_layout = data_layout_at(data_index);
+ return data_from(data_layout);
+}
+ciProfileData* ciMethodData::data_from(DataLayout* data_layout) {
switch (data_layout->tag()) {
case DataLayout::no_tag:
default:
@@ -376,6 +379,16 @@ ciProfileData* ciMethodData::next_data(ciProfileData* current) {
return next;
}
+DataLayout* ciMethodData::next_data_layout(DataLayout* current) {
+ int current_index = dp_to_di((address)current);
+ int next_index = current_index + current->size_in_bytes();
+ if (out_of_bounds(next_index)) {
+ return NULL;
+ }
+ DataLayout* next = data_layout_at(next_index);
+ return next;
+}
+
ciProfileData* ciMethodData::bci_to_extra_data(int bci, ciMethod* m, bool& two_free_slots) {
DataLayout* dp = extra_data_base();
DataLayout* end = args_data_limit();
@@ -413,12 +426,12 @@ ciProfileData* ciMethodData::bci_to_extra_data(int bci, ciMethod* m, bool& two_f
ciProfileData* ciMethodData::bci_to_data(int bci, ciMethod* m) {
// If m is not NULL we look for a SpeculativeTrapData entry
if (m == NULL) {
- ciProfileData* data = data_before(bci);
- for ( ; is_valid(data); data = next_data(data)) {
- if (data->bci() == bci) {
- set_hint_di(dp_to_di(data->dp()));
- return data;
- } else if (data->bci() > bci) {
+ DataLayout* data_layout = data_layout_before(bci);
+ for ( ; is_valid(data_layout); data_layout = next_data_layout(data_layout)) {
+ if (data_layout->bci() == bci) {
+ set_hint_di(dp_to_di((address)data_layout));
+ return data_from(data_layout);
+ } else if (data_layout->bci() > bci) {
break;
}
}
diff --git a/src/hotspot/share/ci/ciMethodData.hpp b/src/hotspot/share/ci/ciMethodData.hpp
index 09f692fd7a6..2e78e5f3bad 100644
--- a/src/hotspot/share/ci/ciMethodData.hpp
+++ b/src/hotspot/share/ci/ciMethodData.hpp
@@ -379,7 +379,7 @@ class ciMethodData : public ciMetadata {
// Data entries
intptr_t* _data;
- // Cached hint for data_before()
+ // Cached hint for data_layout_before()
int _hint_di;
// Is data attached? And is it mature?
@@ -445,17 +445,17 @@ class ciMethodData : public ciMetadata {
assert(!out_of_bounds(di), "hint_di out of bounds");
_hint_di = di;
}
- ciProfileData* data_before(int bci) {
+
+ DataLayout* data_layout_before(int bci) {
// avoid SEGV on this edge case
if (data_size() == 0)
return NULL;
- int hint = hint_di();
- if (data_layout_at(hint)->bci() <= bci)
- return data_at(hint);
- return first_data();
+ DataLayout* layout = data_layout_at(hint_di());
+ if (layout->bci() <= bci)
+ return layout;
+ return data_layout_at(first_di());
}
-
// What is the index of the first data entry?
int first_di() { return 0; }
@@ -469,6 +469,7 @@ class ciMethodData : public ciMetadata {
template void dump_replay_data_call_type_helper(outputStream* out, int round, int& count, T* call_type_data);
template void dump_replay_data_receiver_type_helper(outputStream* out, int round, int& count, T* call_type_data);
void dump_replay_data_extra_data_helper(outputStream* out, int round, int& count);
+ ciProfileData* data_from(DataLayout* data_layout);
public:
bool is_method_data() const { return true; }
@@ -519,7 +520,9 @@ class ciMethodData : public ciMetadata {
// Walk through the data in order.
ciProfileData* first_data() { return data_at(first_di()); }
ciProfileData* next_data(ciProfileData* current);
+ DataLayout* next_data_layout(DataLayout* current);
bool is_valid(ciProfileData* current) { return current != NULL; }
+ bool is_valid(DataLayout* current) { return current != NULL; }
DataLayout* extra_data_base() const { return data_layout_at(data_size()); }
DataLayout* args_data_limit() const { return data_layout_at(data_size() + extra_data_size() -
diff --git a/src/hotspot/share/ci/ciMethodHandle.cpp b/src/hotspot/share/ci/ciMethodHandle.cpp
index be6b76bd49a..49de75353a6 100644
--- a/src/hotspot/share/ci/ciMethodHandle.cpp
+++ b/src/hotspot/share/ci/ciMethodHandle.cpp
@@ -36,11 +36,6 @@ ciMethod* ciMethodHandle::get_vmtarget() const {
VM_ENTRY_MARK;
oop form_oop = java_lang_invoke_MethodHandle::form(get_oop());
oop vmentry_oop = java_lang_invoke_LambdaForm::vmentry(form_oop);
- // FIXME: Share code with ciMemberName::get_vmtarget
- Metadata* vmtarget = java_lang_invoke_MemberName::vmtarget(vmentry_oop);
- if (vmtarget->is_method())
- return CURRENT_ENV->get_method((Method*) vmtarget);
- // FIXME: What if the vmtarget is a Klass?
- assert(false, "");
- return NULL;
+ Method* vmtarget = java_lang_invoke_MemberName::vmtarget(vmentry_oop);
+ return CURRENT_ENV->get_method(vmtarget);
}
diff --git a/src/hotspot/share/ci/ciObjectFactory.cpp b/src/hotspot/share/ci/ciObjectFactory.cpp
index f74f5e627a0..fb227a300f3 100644
--- a/src/hotspot/share/ci/ciObjectFactory.cpp
+++ b/src/hotspot/share/ci/ciObjectFactory.cpp
@@ -67,7 +67,7 @@
// sort of balanced binary tree.
GrowableArray* ciObjectFactory::_shared_ci_metadata = NULL;
-ciSymbol* ciObjectFactory::_shared_ci_symbols[vmSymbols::SID_LIMIT];
+ciSymbol* ciObjectFactory::_shared_ci_symbols[vmSymbols::number_of_symbols()];
int ciObjectFactory::_shared_ident_limit = 0;
volatile bool ciObjectFactory::_initialized = false;
@@ -126,18 +126,19 @@ void ciObjectFactory::init_shared_objects() {
{
// Create the shared symbols, but not in _shared_ci_metadata.
- int i;
- for (i = vmSymbols::FIRST_SID; i < vmSymbols::SID_LIMIT; i++) {
- Symbol* vmsym = vmSymbols::symbol_at((vmSymbols::SID) i);
- assert(vmSymbols::find_sid(vmsym) == i, "1-1 mapping");
- ciSymbol* sym = new (_arena) ciSymbol(vmsym, (vmSymbols::SID) i);
+ for (vmSymbolsIterator it = vmSymbolsRange.begin(); it != vmSymbolsRange.end(); ++it) {
+ vmSymbolID index = *it;
+ Symbol* vmsym = vmSymbols::symbol_at(index);
+ assert(vmSymbols::find_sid(vmsym) == index, "1-1 mapping");
+ ciSymbol* sym = new (_arena) ciSymbol(vmsym, index);
init_ident_of(sym);
- _shared_ci_symbols[i] = sym;
+ _shared_ci_symbols[vmSymbols::as_int(index)] = sym;
}
#ifdef ASSERT
- for (i = vmSymbols::FIRST_SID; i < vmSymbols::SID_LIMIT; i++) {
- Symbol* vmsym = vmSymbols::symbol_at((vmSymbols::SID) i);
- ciSymbol* sym = vm_symbol_at((vmSymbols::SID) i);
+ for (vmSymbolsIterator it = vmSymbolsRange.begin(); it != vmSymbolsRange.end(); ++it) {
+ vmSymbolID index = *it;
+ Symbol* vmsym = vmSymbols::symbol_at(index);
+ ciSymbol* sym = vm_symbol_at(index);
assert(sym->get_symbol() == vmsym, "oop must match");
}
assert(ciSymbol::void_class_signature()->get_symbol() == vmSymbols::void_class_signature(), "spot check");
@@ -208,14 +209,14 @@ void ciObjectFactory::init_shared_objects() {
ciSymbol* ciObjectFactory::get_symbol(Symbol* key) {
- vmSymbols::SID sid = vmSymbols::find_sid(key);
- if (sid != vmSymbols::NO_SID) {
+ vmSymbolID sid = vmSymbols::find_sid(key);
+ if (sid != vmSymbolID::NO_SID) {
// do not pollute the main cache with it
return vm_symbol_at(sid);
}
- assert(vmSymbols::find_sid(key) == vmSymbols::NO_SID, "");
- ciSymbol* s = new (arena()) ciSymbol(key, vmSymbols::NO_SID);
+ assert(vmSymbols::find_sid(key) == vmSymbolID::NO_SID, "");
+ ciSymbol* s = new (arena()) ciSymbol(key, vmSymbolID::NO_SID);
_symbols->push(s);
return s;
}
@@ -678,8 +679,8 @@ void ciObjectFactory::insert_non_perm(ciObjectFactory::NonPermObject* &where, oo
// ------------------------------------------------------------------
// ciObjectFactory::vm_symbol_at
// Get the ciSymbol corresponding to some index in vmSymbols.
-ciSymbol* ciObjectFactory::vm_symbol_at(int index) {
- assert(index >= vmSymbols::FIRST_SID && index < vmSymbols::SID_LIMIT, "oob");
+ciSymbol* ciObjectFactory::vm_symbol_at(vmSymbolID sid) {
+ int index = vmSymbols::as_int(sid);
return _shared_ci_symbols[index];
}
diff --git a/src/hotspot/share/ci/ciObjectFactory.hpp b/src/hotspot/share/ci/ciObjectFactory.hpp
index f78ae05a905..ca5a9613eea 100644
--- a/src/hotspot/share/ci/ciObjectFactory.hpp
+++ b/src/hotspot/share/ci/ciObjectFactory.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
#include "ci/ciClassList.hpp"
#include "ci/ciObject.hpp"
#include "utilities/growableArray.hpp"
+#include "utilities/vmEnums.hpp"
// ciObjectFactory
//
@@ -104,7 +105,7 @@ class ciObjectFactory : public ResourceObj {
ciSymbol* get_symbol(Symbol* key);
// Get the ciSymbol corresponding to one of the vmSymbols.
- static ciSymbol* vm_symbol_at(int index);
+ static ciSymbol* vm_symbol_at(vmSymbolID index);
// Get the ciMethod representing an unloaded/unfound method.
ciMethod* get_unloaded_method(ciInstanceKlass* holder,
diff --git a/src/hotspot/share/ci/ciReplay.cpp b/src/hotspot/share/ci/ciReplay.cpp
index b9fec53b08b..552c6346741 100644
--- a/src/hotspot/share/ci/ciReplay.cpp
+++ b/src/hotspot/share/ci/ciReplay.cpp
@@ -494,7 +494,7 @@ class CompileReplay : public StackObj {
return true;
}
- // compile inline ...
+ // compile inline ( )*
void* process_inline(ciMethod* imethod, Method* m, int entry_bci, int comp_level, TRAPS) {
_imethod = m;
_iklass = imethod->holder();
@@ -524,7 +524,7 @@ class CompileReplay : public StackObj {
return NULL;
}
- // compile inline ...
+ // compile inline ( )*
void process_compile(TRAPS) {
Method* method = parse_method(CHECK);
if (had_error()) return;
@@ -606,8 +606,6 @@ class CompileReplay : public StackObj {
}
// ciMethod
- //
- //
void process_ciMethod(TRAPS) {
Method* method = parse_method(CHECK);
if (had_error()) return;
@@ -619,7 +617,7 @@ class CompileReplay : public StackObj {
rec->_instructions_size = parse_int("instructions_size");
}
- // ciMethodData orig # # ... data # # ... oops # ... methods
+ // ciMethodData orig * data * oops ( )* methods ( )*
void process_ciMethodData(TRAPS) {
Method* method = parse_method(CHECK);
if (had_error()) return;
@@ -694,7 +692,7 @@ class CompileReplay : public StackObj {
Klass* k = parse_klass(CHECK);
}
- // ciInstanceKlass tag # # # ...
+ // ciInstanceKlass tag*
//
// Load the klass 'name' and link or initialize it. Verify that the
// constant pool is the same length as 'length' and make sure the
@@ -789,10 +787,12 @@ class CompileReplay : public StackObj {
}
}
+ // staticfield
+ //
// Initialize a class and fill in the value for a static field.
// This is useful when the compile was dependent on the value of
// static fields but it's impossible to properly rerun the static
- // initiailizer.
+ // initializer.
void process_staticfield(TRAPS) {
InstanceKlass* k = (InstanceKlass *)parse_klass(CHECK);
@@ -906,6 +906,7 @@ class CompileReplay : public StackObj {
}
#if INCLUDE_JVMTI
+ // JvmtiExport
void process_JvmtiExport(TRAPS) {
const char* field = parse_string();
bool value = parse_int("JvmtiExport flag") != 0;
diff --git a/src/hotspot/share/ci/ciReplay.hpp b/src/hotspot/share/ci/ciReplay.hpp
index c224ba556d7..1bac57debf0 100644
--- a/src/hotspot/share/ci/ciReplay.hpp
+++ b/src/hotspot/share/ci/ciReplay.hpp
@@ -116,7 +116,6 @@ class ciReplay {
static void initialize(ciMethod* method);
static bool is_loaded(Method* method);
- static bool is_loaded(Klass* klass);
static bool should_not_inline(ciMethod* method);
static bool should_inline(void* data, ciMethod* method, int bci, int inline_depth);
diff --git a/src/hotspot/share/ci/ciSignature.hpp b/src/hotspot/share/ci/ciSignature.hpp
index 79341bab7d2..0a150d88e61 100644
--- a/src/hotspot/share/ci/ciSignature.hpp
+++ b/src/hotspot/share/ci/ciSignature.hpp
@@ -50,8 +50,6 @@ class ciSignature : public ResourceObj {
ciSignature(ciKlass* accessing_klass, const constantPoolHandle& cpool, ciSymbol* signature);
ciSignature(ciKlass* accessing_klass, ciSymbol* signature, ciMethodType* method_type);
- void get_all_klasses();
-
Symbol* get_symbol() const { return _symbol->get_symbol(); }
public:
diff --git a/src/hotspot/share/ci/ciSymbol.cpp b/src/hotspot/share/ci/ciSymbol.cpp
index 24de284c53e..32ab241bd3a 100644
--- a/src/hotspot/share/ci/ciSymbol.cpp
+++ b/src/hotspot/share/ci/ciSymbol.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -27,12 +27,13 @@
#include "ci/ciUtilities.inline.hpp"
#include "classfile/symbolTable.hpp"
#include "memory/oopFactory.hpp"
+#include "prims/methodHandles.hpp"
// ------------------------------------------------------------------
// ciSymbol::ciSymbol
//
// Preallocated symbol variant. Used with symbols from vmSymbols.
-ciSymbol::ciSymbol(Symbol* s, vmSymbols::SID sid)
+ciSymbol::ciSymbol(Symbol* s, vmSymbolID sid)
: _symbol(s), _sid(sid)
{
assert(_symbol != NULL, "adding null symbol");
@@ -42,7 +43,7 @@ ciSymbol::ciSymbol(Symbol* s, vmSymbols::SID sid)
// Normal case for non-famous symbols.
ciSymbol::ciSymbol(Symbol* s)
- : _symbol(s), _sid(vmSymbols::NO_SID)
+ : _symbol(s), _sid(vmSymbolID::NO_SID)
{
assert(_symbol != NULL, "adding null symbol");
_symbol->increment_refcount(); // increment ref count
diff --git a/src/hotspot/share/ci/ciSymbol.hpp b/src/hotspot/share/ci/ciSymbol.hpp
index fb05a6239f4..30facfc9c09 100644
--- a/src/hotspot/share/ci/ciSymbol.hpp
+++ b/src/hotspot/share/ci/ciSymbol.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,7 @@
#include "ci/ciObjectFactory.hpp"
#include "classfile/vmSymbols.hpp"
#include "oops/symbol.hpp"
+#include "utilities/vmEnums.hpp"
// ciSymbol
//
@@ -48,11 +49,11 @@ class ciSymbol : public ciBaseObject {
friend class ciObjArrayKlass;
private:
- const vmSymbols::SID _sid;
+ const vmSymbolID _sid;
DEBUG_ONLY( bool sid_ok() { return vmSymbols::find_sid(get_symbol()) == _sid; } )
ciSymbol(Symbol* s); // normal case, for symbols not mentioned in vmSymbols
- ciSymbol(Symbol* s, vmSymbols::SID sid); // for use with vmSymbols
+ ciSymbol(Symbol* s, vmSymbolID sid); // for use with vmSymbols
Symbol* get_symbol() const { return _symbol; }
@@ -68,7 +69,7 @@ class ciSymbol : public ciBaseObject {
public:
// The enumeration ID from vmSymbols, or vmSymbols::NO_SID if none.
- vmSymbols::SID sid() const { return _sid; }
+ vmSymbolID sid() const { return _sid; }
// The text of the symbol as a null-terminated utf8 string.
const char* as_utf8();
@@ -98,7 +99,7 @@ class ciSymbol : public ciBaseObject {
static ciSymbol* make(const char* s);
#define CI_SYMBOL_DECLARE(name, ignore_def) \
- static ciSymbol* name() { return ciObjectFactory::vm_symbol_at(vmSymbols::VM_SYMBOL_ENUM_NAME(name)); }
+ static ciSymbol* name() { return ciObjectFactory::vm_symbol_at(VM_SYMBOL_ENUM_NAME(name)); }
VM_SYMBOLS_DO(CI_SYMBOL_DECLARE, CI_SYMBOL_DECLARE)
#undef CI_SYMBOL_DECLARE
diff --git a/src/hotspot/share/classfile/altHashing.cpp b/src/hotspot/share/classfile/altHashing.cpp
index bce06ad5813..a01e6e29fb7 100644
--- a/src/hotspot/share/classfile/altHashing.cpp
+++ b/src/hotspot/share/classfile/altHashing.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -22,13 +22,30 @@
*
*/
+/*
+ * halfsiphash code adapted from reference implementation
+ * (https://github.com/veorq/SipHash/blob/master/halfsiphash.c)
+ * which is distributed with the following copyright:
+ *
+ * SipHash reference C implementation
+ *
+ * Copyright (c) 2016 Jean-Philippe Aumasson
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide. This software is distributed without any warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication along
+ * with this software. If not, see
+ * .
+ */
+
#include "precompiled.hpp"
#include "classfile/altHashing.hpp"
-#include "classfile/symbolTable.hpp"
#include "classfile/systemDictionary.hpp"
#include "oops/markWord.hpp"
#include "oops/oop.inline.hpp"
-#include "runtime/thread.hpp"
+#include "runtime/os.hpp"
// Get the hash code of the classes mirror if it exists, otherwise just
// return a random number, which is one of the possible hash code used for
@@ -40,169 +57,182 @@ static intptr_t object_hash(Klass* k) {
}
// Seed value used for each alternative hash calculated.
-juint AltHashing::compute_seed() {
- jlong nanos = os::javaTimeNanos();
- jlong now = os::javaTimeMillis();
- jint SEED_MATERIAL[8] = {
- (jint) object_hash(SystemDictionary::String_klass()),
- (jint) object_hash(SystemDictionary::System_klass()),
- (jint) os::random(), // current thread isn't a java thread
- (jint) (((julong)nanos) >> 32),
- (jint) nanos,
- (jint) (((julong)now) >> 32),
- (jint) now,
- (jint) (os::javaTimeNanos() >> 2)
+uint64_t AltHashing::compute_seed() {
+ uint64_t nanos = os::javaTimeNanos();
+ uint64_t now = os::javaTimeMillis();
+ uint32_t SEED_MATERIAL[8] = {
+ (uint32_t) object_hash(SystemDictionary::String_klass()),
+ (uint32_t) object_hash(SystemDictionary::System_klass()),
+ (uint32_t) os::random(), // current thread isn't a java thread
+ (uint32_t) (((uint64_t)nanos) >> 32),
+ (uint32_t) nanos,
+ (uint32_t) (((uint64_t)now) >> 32),
+ (uint32_t) now,
+ (uint32_t) (os::javaTimeNanos() >> 2)
};
- return murmur3_32(SEED_MATERIAL, 8);
+ return halfsiphash_64(SEED_MATERIAL, 8);
+}
+
+// utility function copied from java/lang/Integer
+static uint32_t Integer_rotateLeft(uint32_t i, int distance) {
+ return (i << distance) | (i >> (32 - distance));
}
+static void halfsiphash_rounds(uint32_t v[4], int rounds) {
+ while (rounds-- > 0) {
+ v[0] += v[1];
+ v[1] = Integer_rotateLeft(v[1], 5);
+ v[1] ^= v[0];
+ v[0] = Integer_rotateLeft(v[0], 16);
+ v[2] += v[3];
+ v[3] = Integer_rotateLeft(v[3], 8);
+ v[3] ^= v[2];
+ v[0] += v[3];
+ v[3] = Integer_rotateLeft(v[3], 7);
+ v[3] ^= v[0];
+ v[2] += v[1];
+ v[1] = Integer_rotateLeft(v[1], 13);
+ v[1] ^= v[2];
+ v[2] = Integer_rotateLeft(v[2], 16);
+ }
+}
+
+static void halfsiphash_adddata(uint32_t v[4], uint32_t newdata, int rounds) {
+ v[3] ^= newdata;
+ halfsiphash_rounds(v, rounds);
+ v[0] ^= newdata;
+}
+
+static void halfsiphash_init32(uint32_t v[4], uint64_t seed) {
+ v[0] = seed & 0xffffffff;
+ v[1] = seed >> 32;
+ v[2] = 0x6c796765 ^ v[0];
+ v[3] = 0x74656462 ^ v[1];
+}
+
+static void halfsiphash_init64(uint32_t v[4], uint64_t seed) {
+ halfsiphash_init32(v, seed);
+ v[1] ^= 0xee;
+}
+
+uint32_t halfsiphash_finish32(uint32_t v[4], int rounds) {
+ v[2] ^= 0xff;
+ halfsiphash_rounds(v, rounds);
+ return (v[1] ^ v[3]);
+}
+
+static uint64_t halfsiphash_finish64(uint32_t v[4], int rounds) {
+ uint64_t rv;
+ v[2] ^= 0xee;
+ halfsiphash_rounds(v, rounds);
+ rv = v[1] ^ v[3];
+ v[1] ^= 0xdd;
+ halfsiphash_rounds(v, rounds);
+ rv |= (uint64_t)(v[1] ^ v[3]) << 32;
+ return rv;
+}
-// Murmur3 hashing for Symbol
-juint AltHashing::murmur3_32(juint seed, const jbyte* data, int len) {
- juint h1 = seed;
+// HalfSipHash-2-4 (32-bit output) for Symbols
+uint32_t AltHashing::halfsiphash_32(uint64_t seed, const uint8_t* data, int len) {
+ uint32_t v[4];
+ uint32_t newdata;
+ int off = 0;
int count = len;
- int offset = 0;
+
+ halfsiphash_init32(v, seed);
// body
while (count >= 4) {
- juint k1 = (data[offset] & 0x0FF)
- | (data[offset + 1] & 0x0FF) << 8
- | (data[offset + 2] & 0x0FF) << 16
- | data[offset + 3] << 24;
- count -= 4;
- offset += 4;
+ // Avoid sign extension with 0x0ff
+ newdata = (data[off] & 0x0FF)
+ | (data[off + 1] & 0x0FF) << 8
+ | (data[off + 2] & 0x0FF) << 16
+ | data[off + 3] << 24;
- k1 *= 0xcc9e2d51;
- k1 = Integer_rotateLeft(k1, 15);
- k1 *= 0x1b873593;
+ count -= 4;
+ off += 4;
- h1 ^= k1;
- h1 = Integer_rotateLeft(h1, 13);
- h1 = h1 * 5 + 0xe6546b64;
+ halfsiphash_adddata(v, newdata, 2);
}
// tail
+ newdata = ((uint32_t)len) << 24; // (Byte.SIZE / Byte.SIZE);
if (count > 0) {
- juint k1 = 0;
-
switch (count) {
case 3:
- k1 ^= (data[offset + 2] & 0xff) << 16;
+ newdata |= (data[off + 2] & 0x0ff) << 16;
// fall through
case 2:
- k1 ^= (data[offset + 1] & 0xff) << 8;
+ newdata |= (data[off + 1] & 0x0ff) << 8;
// fall through
case 1:
- k1 ^= (data[offset] & 0xff);
+ newdata |= (data[off] & 0x0ff);
// fall through
- default:
- k1 *= 0xcc9e2d51;
- k1 = Integer_rotateLeft(k1, 15);
- k1 *= 0x1b873593;
- h1 ^= k1;
}
}
- // finalization
- h1 ^= len;
+ halfsiphash_adddata(v, newdata, 2);
- // finalization mix force all bits of a hash block to avalanche
- h1 ^= h1 >> 16;
- h1 *= 0x85ebca6b;
- h1 ^= h1 >> 13;
- h1 *= 0xc2b2ae35;
- h1 ^= h1 >> 16;
-
- return h1;
+ // finalization
+ return halfsiphash_finish32(v, 4);
}
-// Murmur3 hashing for Strings
-juint AltHashing::murmur3_32(juint seed, const jchar* data, int len) {
- juint h1 = seed;
-
+// HalfSipHash-2-4 (32-bit output) for Strings
+uint32_t AltHashing::halfsiphash_32(uint64_t seed, const uint16_t* data, int len) {
+ uint32_t v[4];
+ uint32_t newdata;
int off = 0;
int count = len;
+ halfsiphash_init32(v, seed);
+
// body
while (count >= 2) {
- jchar d1 = data[off++] & 0xFFFF;
- jchar d2 = data[off++];
- juint k1 = (d1 | d2 << 16);
+ uint16_t d1 = data[off++] & 0x0FFFF;
+ uint16_t d2 = data[off++];
+ newdata = (d1 | d2 << 16);
count -= 2;
- k1 *= 0xcc9e2d51;
- k1 = Integer_rotateLeft(k1, 15);
- k1 *= 0x1b873593;
-
- h1 ^= k1;
- h1 = Integer_rotateLeft(h1, 13);
- h1 = h1 * 5 + 0xe6546b64;
+ halfsiphash_adddata(v, newdata, 2);
}
// tail
-
+ newdata = ((uint32_t)len * 2) << 24; // (Character.SIZE / Byte.SIZE);
if (count > 0) {
- juint k1 = (juint)data[off];
-
- k1 *= 0xcc9e2d51;
- k1 = Integer_rotateLeft(k1, 15);
- k1 *= 0x1b873593;
- h1 ^= k1;
+ newdata |= (uint32_t)data[off];
}
+ halfsiphash_adddata(v, newdata, 2);
// finalization
- h1 ^= len * 2; // (Character.SIZE / Byte.SIZE);
-
- // finalization mix force all bits of a hash block to avalanche
- h1 ^= h1 >> 16;
- h1 *= 0x85ebca6b;
- h1 ^= h1 >> 13;
- h1 *= 0xc2b2ae35;
- h1 ^= h1 >> 16;
-
- return h1;
+ return halfsiphash_finish32(v, 4);
}
-// Hash used for the seed.
-juint AltHashing::murmur3_32(juint seed, const jint* data, int len) {
- juint h1 = seed;
+// HalfSipHash-2-4 (64-bit output) for integers (used to create seed)
+uint64_t AltHashing::halfsiphash_64(uint64_t seed, const uint32_t* data, int len) {
+ uint32_t v[4];
int off = 0;
int end = len;
+ halfsiphash_init64(v, seed);
+
// body
while (off < end) {
- juint k1 = (juint)data[off++];
-
- k1 *= 0xcc9e2d51;
- k1 = Integer_rotateLeft(k1, 15);
- k1 *= 0x1b873593;
-
- h1 ^= k1;
- h1 = Integer_rotateLeft(h1, 13);
- h1 = h1 * 5 + 0xe6546b64;
+ halfsiphash_adddata(v, (uint32_t)data[off++], 2);
}
// tail (always empty, as body is always 32-bit chunks)
// finalization
-
- h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE);
-
- // finalization mix force all bits of a hash block to avalanche
- h1 ^= h1 >> 16;
- h1 *= 0x85ebca6b;
- h1 ^= h1 >> 13;
- h1 *= 0xc2b2ae35;
- h1 ^= h1 >> 16;
-
- return h1;
+ halfsiphash_adddata(v, ((uint32_t)len * 4) << 24, 2); // (Integer.SIZE / Byte.SIZE);
+ return halfsiphash_finish64(v, 4);
}
-juint AltHashing::murmur3_32(const jint* data, int len) {
- return murmur3_32(0, data, len);
+// HalfSipHash-2-4 (64-bit output) for integers (used to create seed)
+uint64_t AltHashing::halfsiphash_64(const uint32_t* data, int len) {
+ return halfsiphash_64((uint64_t)0, data, len);
}
diff --git a/src/hotspot/share/classfile/altHashing.hpp b/src/hotspot/share/classfile/altHashing.hpp
index 8af990d26ee..e1726ae5152 100644
--- a/src/hotspot/share/classfile/altHashing.hpp
+++ b/src/hotspot/share/classfile/altHashing.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,29 +26,25 @@
#define SHARE_CLASSFILE_ALTHASHING_HPP
#include "jni.h"
-#include "classfile/symbolTable.hpp"
+#include "memory/allocation.hpp"
/**
- * Hashing utilities.
- *
- * Implementation of Murmur3 hashing.
- * This code was translated from src/share/classes/sun/misc/Hashing.java
- * code in the JDK.
+ * Implementation of alternate more secure hashing.
*/
class AltHashing : AllStatic {
friend class AltHashingTest;
- // utility function copied from java/lang/Integer
- static juint Integer_rotateLeft(juint i, int distance) {
- return (i << distance) | (i >> (32 - distance));
- }
- static juint murmur3_32(const jint* data, int len);
- static juint murmur3_32(juint seed, const jint* data, int len);
+ // For the seed computation
+ static uint64_t halfsiphash_64(const uint32_t* data, int len);
+ static uint64_t halfsiphash_64(uint64_t seed, const uint32_t* data, int len);
public:
- static juint compute_seed();
- static juint murmur3_32(juint seed, const jbyte* data, int len);
- static juint murmur3_32(juint seed, const jchar* data, int len);
+ static uint64_t compute_seed();
+
+ // For Symbols
+ static uint32_t halfsiphash_32(uint64_t seed, const uint8_t* data, int len);
+ // For Strings
+ static uint32_t halfsiphash_32(uint64_t seed, const uint16_t* data, int len);
};
#endif // SHARE_CLASSFILE_ALTHASHING_HPP
diff --git a/src/hotspot/share/classfile/classFileError.cpp b/src/hotspot/share/classfile/classFileError.cpp
index f44588efbc4..b3610f8b838 100644
--- a/src/hotspot/share/classfile/classFileError.cpp
+++ b/src/hotspot/share/classfile/classFileError.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -78,6 +78,25 @@ void ClassFileParser::classfile_parse_error(const char* msg,
msg, name, signature, _class_name->as_C_string());
}
+void ClassFileParser::classfile_icce_error(const char* msg,
+ const Klass* k,
+ TRAPS) const {
+ assert(_class_name != NULL, "invariant");
+ ResourceMark rm(THREAD);
+ Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::java_lang_IncompatibleClassChangeError(),
+ msg, _class_name->as_klass_external_name(), k->external_name());
+}
+
+void ClassFileParser::classfile_ucve_error(const char* msg,
+ const Symbol* class_name,
+ u2 major,
+ u2 minor,
+ TRAPS) const {
+ ResourceMark rm(THREAD);
+ Exceptions::fthrow(THREAD_AND_LOCATION, vmSymbols::java_lang_UnsupportedClassVersionError(),
+ msg, class_name->as_C_string(), major, minor);
+}
+
PRAGMA_DIAG_POP
void StackMapStream::stackmap_format_error(const char* msg, TRAPS) {
diff --git a/src/hotspot/share/classfile/classFileParser.cpp b/src/hotspot/share/classfile/classFileParser.cpp
index e6ab199417b..52767594516 100644
--- a/src/hotspot/share/classfile/classFileParser.cpp
+++ b/src/hotspot/share/classfile/classFileParser.cpp
@@ -212,7 +212,8 @@ void ClassFileParser::parse_constant_pool_entries(const ClassFileStream* const s
if (_major_version < Verifier::INVOKEDYNAMIC_MAJOR_VERSION) {
classfile_parse_error(
"Class file version does not support constant tag %u in class file %s",
- tag, CHECK);
+ tag, THREAD);
+ return;
}
if (tag == JVM_CONSTANT_MethodHandle) {
cfs->guarantee_more(4, CHECK); // ref_kind, method_index, tag/access_flags
@@ -234,7 +235,8 @@ void ClassFileParser::parse_constant_pool_entries(const ClassFileStream* const s
if (_major_version < Verifier::DYNAMICCONSTANT_MAJOR_VERSION) {
classfile_parse_error(
"Class file version does not support constant tag %u in class file %s",
- tag, CHECK);
+ tag, THREAD);
+ return;
}
cfs->guarantee_more(5, CHECK); // bsm_index, nt, tag/access_flags
const u2 bootstrap_specifier_index = cfs->get_u2_fast();
@@ -249,7 +251,8 @@ void ClassFileParser::parse_constant_pool_entries(const ClassFileStream* const s
if (_major_version < Verifier::INVOKEDYNAMIC_MAJOR_VERSION) {
classfile_parse_error(
"Class file version does not support constant tag %u in class file %s",
- tag, CHECK);
+ tag, THREAD);
+ return;
}
cfs->guarantee_more(5, CHECK); // bsm_index, nt, tag/access_flags
const u2 bootstrap_specifier_index = cfs->get_u2_fast();
@@ -368,8 +371,8 @@ void ClassFileParser::parse_constant_pool_entries(const ClassFileStream* const s
default: {
classfile_parse_error("Unknown constant tag %u in class file %s",
tag,
- CHECK);
- break;
+ THREAD);
+ return;
}
} // end of switch(tag)
} // end of for
@@ -562,7 +565,8 @@ void ClassFileParser::parse_constant_pool(const ClassFileStream* const stream,
default: {
classfile_parse_error(
"Bad method handle kind at constant pool index %u in class file %s",
- index, CHECK);
+ index, THREAD);
+ return;
}
} // switch(refkind)
// Keep the ref_index unchanged. It will be indirected at link-time.
@@ -740,7 +744,8 @@ void ClassFileParser::parse_constant_pool(const ClassFileStream* const stream,
name != vmSymbols::object_initializer_name()) {
classfile_parse_error(
"Bad method name at constant pool index %u in class file %s",
- name_ref_index, CHECK);
+ name_ref_index, THREAD);
+ return;
}
}
break;
@@ -762,13 +767,15 @@ void ClassFileParser::parse_constant_pool(const ClassFileStream* const stream,
if (name != vmSymbols::object_initializer_name()) {
classfile_parse_error(
"Bad constructor name at constant pool index %u in class file %s",
- name_ref_index, CHECK);
+ name_ref_index, THREAD);
+ return;
}
} else {
if (name == vmSymbols::object_initializer_name()) {
classfile_parse_error(
"Bad method name at constant pool index %u in class file %s",
- name_ref_index, CHECK);
+ name_ref_index, THREAD);
+ return;
}
}
break;
@@ -1007,7 +1014,7 @@ void ClassFileParser::parse_interfaces(const ClassFileStream* const stream,
}
if (dup) {
classfile_parse_error("Duplicate interface name \"%s\" in class file %s",
- name->as_C_string(), CHECK);
+ name->as_C_string(), THREAD);
}
}
}
@@ -1063,7 +1070,7 @@ void ClassFileParser::verify_constantvalue(const ConstantPool* const cp,
default: {
classfile_parse_error("Unable to set initial value %u in class file %s",
constantvalue_index,
- CHECK);
+ THREAD);
}
}
}
@@ -1350,7 +1357,8 @@ void ClassFileParser::parse_field_attributes(const ClassFileStream* const cfs,
if (is_static && attribute_name == vmSymbols::tag_constant_value()) {
// ignore if non-static
if (constantvalue_index != 0) {
- classfile_parse_error("Duplicate ConstantValue attribute in class file %s", CHECK);
+ classfile_parse_error("Duplicate ConstantValue attribute in class file %s", THREAD);
+ return;
}
check_property(
attribute_length == 2,
@@ -1365,31 +1373,36 @@ void ClassFileParser::parse_field_attributes(const ClassFileStream* const cfs,
if (attribute_length != 0) {
classfile_parse_error(
"Invalid Synthetic field attribute length %u in class file %s",
- attribute_length, CHECK);
+ attribute_length, THREAD);
+ return;
}
is_synthetic = true;
} else if (attribute_name == vmSymbols::tag_deprecated()) { // 4276120
if (attribute_length != 0) {
classfile_parse_error(
"Invalid Deprecated field attribute length %u in class file %s",
- attribute_length, CHECK);
+ attribute_length, THREAD);
+ return;
}
} else if (_major_version >= JAVA_1_5_VERSION) {
if (attribute_name == vmSymbols::tag_signature()) {
if (generic_signature_index != 0) {
classfile_parse_error(
- "Multiple Signature attributes for field in class file %s", CHECK);
+ "Multiple Signature attributes for field in class file %s", THREAD);
+ return;
}
if (attribute_length != 2) {
classfile_parse_error(
"Wrong size %u for field's Signature attribute in class file %s",
- attribute_length, CHECK);
+ attribute_length, THREAD);
+ return;
}
generic_signature_index = parse_generic_signature_attribute(cfs, CHECK);
} else if (attribute_name == vmSymbols::tag_runtime_visible_annotations()) {
if (runtime_visible_annotations != NULL) {
classfile_parse_error(
- "Multiple RuntimeVisibleAnnotations attributes for field in class file %s", CHECK);
+ "Multiple RuntimeVisibleAnnotations attributes for field in class file %s", THREAD);
+ return;
}
runtime_visible_annotations_length = attribute_length;
runtime_visible_annotations = cfs->current();
@@ -1406,7 +1419,8 @@ void ClassFileParser::parse_field_attributes(const ClassFileStream* const cfs,
} else if (attribute_name == vmSymbols::tag_runtime_invisible_annotations()) {
if (runtime_invisible_annotations_exists) {
classfile_parse_error(
- "Multiple RuntimeInvisibleAnnotations attributes for field in class file %s", CHECK);
+ "Multiple RuntimeInvisibleAnnotations attributes for field in class file %s", THREAD);
+ return;
}
runtime_invisible_annotations_exists = true;
if (PreserveAllAnnotations) {
@@ -1418,7 +1432,8 @@ void ClassFileParser::parse_field_attributes(const ClassFileStream* const cfs,
} else if (attribute_name == vmSymbols::tag_runtime_visible_type_annotations()) {
if (runtime_visible_type_annotations != NULL) {
classfile_parse_error(
- "Multiple RuntimeVisibleTypeAnnotations attributes for field in class file %s", CHECK);
+ "Multiple RuntimeVisibleTypeAnnotations attributes for field in class file %s", THREAD);
+ return;
}
runtime_visible_type_annotations_length = attribute_length;
runtime_visible_type_annotations = cfs->current();
@@ -1427,7 +1442,8 @@ void ClassFileParser::parse_field_attributes(const ClassFileStream* const cfs,
} else if (attribute_name == vmSymbols::tag_runtime_invisible_type_annotations()) {
if (runtime_invisible_type_annotations_exists) {
classfile_parse_error(
- "Multiple RuntimeInvisibleTypeAnnotations attributes for field in class file %s", CHECK);
+ "Multiple RuntimeInvisibleTypeAnnotations attributes for field in class file %s", THREAD);
+ return;
} else {
runtime_invisible_type_annotations_exists = true;
}
@@ -1541,14 +1557,13 @@ class ClassFileParser::FieldAllocationCount : public ResourceObj {
}
}
- FieldAllocationType update(bool is_static, BasicType type) {
+ void update(bool is_static, BasicType type) {
FieldAllocationType atype = basic_type_to_atype(is_static, type);
if (atype != BAD_ALLOCATION_TYPE) {
// Make sure there is no overflow with injected fields.
assert(count[atype] < 0xFFFF, "More than 65535 fields");
count[atype]++;
}
- return atype;
}
};
@@ -1688,9 +1703,8 @@ void ClassFileParser::parse_fields(const ClassFileStream* const cfs,
constantvalue_index);
const BasicType type = cp->basic_type_for_signature_at(signature_index);
- // Remember how many oops we encountered and compute allocation type
- const FieldAllocationType atype = fac->update(is_static, type);
- field->set_allocation_type(atype);
+ // Update FieldAllocationCount for this kind of field
+ fac->update(is_static, type);
// After field is initialized with type, we can augment it with aux info
if (parsed_annotations.has_any_annotations()) {
@@ -1726,16 +1740,15 @@ void ClassFileParser::parse_fields(const ClassFileStream* const cfs,
// Injected field
FieldInfo* const field = FieldInfo::from_field_array(fa, index);
- field->initialize(JVM_ACC_FIELD_INTERNAL,
- injected[n].name_index,
- injected[n].signature_index,
+ field->initialize((u2)JVM_ACC_FIELD_INTERNAL,
+ (u2)(injected[n].name_index),
+ (u2)(injected[n].signature_index),
0);
const BasicType type = Signature::basic_type(injected[n].signature());
- // Remember how many oops we encountered and compute allocation type
- const FieldAllocationType atype = fac->update(false, type);
- field->set_allocation_type(atype);
+ // Update FieldAllocationCount for this kind of field
+ fac->update(false, type);
index++;
}
}
@@ -1785,7 +1798,7 @@ void ClassFileParser::parse_fields(const ClassFileStream* const cfs,
}
if (dup) {
classfile_parse_error("Duplicate field name \"%s\" with signature \"%s\" in class file %s",
- name->as_C_string(), sig->as_klass_external_name(), CHECK);
+ name->as_C_string(), sig->as_klass_external_name(), THREAD);
}
}
}
@@ -1953,12 +1966,14 @@ const ClassFileParser::unsafe_u2* ClassFileParser::parse_localvariable_table(con
if (start_pc >= code_length) {
classfile_parse_error(
"Invalid start_pc %u in %s in class file %s",
- start_pc, tbl_name, CHECK_NULL);
+ start_pc, tbl_name, THREAD);
+ return NULL;
}
if (end_pc > code_length) {
classfile_parse_error(
"Invalid length %u in %s in class file %s",
- length, tbl_name, CHECK_NULL);
+ length, tbl_name, THREAD);
+ return NULL;
}
const int cp_size = cp->length();
guarantee_property(valid_symbol_at(name_index),
@@ -2066,53 +2081,53 @@ AnnotationCollector::ID
AnnotationCollector::annotation_index(const ClassLoaderData* loader_data,
const Symbol* name,
const bool can_access_vm_annotations) {
- const vmSymbols::SID sid = vmSymbols::find_sid(name);
+ const vmSymbolID sid = vmSymbols::find_sid(name);
// Privileged code can use all annotations. Other code silently drops some.
const bool privileged = loader_data->is_boot_class_loader_data() ||
loader_data->is_platform_class_loader_data() ||
can_access_vm_annotations;
switch (sid) {
- case vmSymbols::VM_SYMBOL_ENUM_NAME(reflect_CallerSensitive_signature): {
+ case VM_SYMBOL_ENUM_NAME(reflect_CallerSensitive_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_CallerSensitive;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_ForceInline_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_ForceInline_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_ForceInline;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_DontInline_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_DontInline_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_DontInline;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_InjectedProfile_signature): {
+ case VM_SYMBOL_ENUM_NAME(java_lang_invoke_InjectedProfile_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_InjectedProfile;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Compiled_signature): {
+ case VM_SYMBOL_ENUM_NAME(java_lang_invoke_LambdaForm_Compiled_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_LambdaForm_Compiled;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Hidden_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Hidden_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_Hidden;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_IntrinsicCandidate_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_IntrinsicCandidate_signature): {
if (_location != _in_method) break; // only allow for methods
if (!privileged) break; // only allow in privileged code
return _method_IntrinsicCandidate;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Stable_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Stable_signature): {
if (_location != _in_field) break; // only allow for fields
if (!privileged) break; // only allow in privileged code
return _field_Stable;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Contended_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_Contended_signature): {
if (_location != _in_field && _location != _in_class) {
break; // only allow for fields and classes
}
@@ -2121,7 +2136,7 @@ AnnotationCollector::annotation_index(const ClassLoaderData* loader_data,
}
return _jdk_internal_vm_annotation_Contended;
}
- case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_ReservedStackAccess_signature): {
+ case VM_SYMBOL_ENUM_NAME(jdk_internal_vm_annotation_ReservedStackAccess_signature): {
if (_location != _in_method) break; // only allow for methods
if (RestrictReservedStack && !privileged) break; // honor privileges
return _jdk_internal_vm_annotation_ReservedStackAccess;
@@ -2218,7 +2233,8 @@ void ClassFileParser::copy_localvariable_table(const ConstMethod* cm,
classfile_parse_error("Duplicated LocalVariableTable attribute "
"entry for '%s' in class file %s",
_cp->symbol_at(lvt->name_cp_index)->as_utf8(),
- CHECK);
+ THREAD);
+ return;
}
}
}
@@ -2237,13 +2253,15 @@ void ClassFileParser::copy_localvariable_table(const ConstMethod* cm,
classfile_parse_error("LVTT entry for '%s' in class file %s "
"does not match any LVT entry",
_cp->symbol_at(lvtt_elem.name_cp_index)->as_utf8(),
- CHECK);
+ THREAD);
+ return;
}
} else if ((*entry)->signature_cp_index != 0 && _need_verify) {
classfile_parse_error("Duplicated LocalVariableTypeTable attribute "
"entry for '%s' in class file %s",
_cp->symbol_at(lvtt_elem.name_cp_index)->as_utf8(),
- CHECK);
+ THREAD);
+ return;
} else {
// to add generic signatures into LocalVariableTable
(*entry)->signature_cp_index = lvtt_elem.descriptor_cp_index;
@@ -2361,14 +2379,16 @@ Method* ClassFileParser::parse_method(const ClassFileStream* const cfs,
} else if ((flags & JVM_ACC_STATIC) == JVM_ACC_STATIC) {
flags &= JVM_ACC_STATIC | JVM_ACC_STRICT;
} else {
- classfile_parse_error("Method is not static in class file %s", CHECK_NULL);
+ classfile_parse_error("Method is not static in class file %s", THREAD);
+ return NULL;
}
} else {
verify_legal_method_modifiers(flags, is_interface, name, CHECK_NULL);
}
if (name == vmSymbols::object_initializer_name() && is_interface) {
- classfile_parse_error("Interface cannot have a method named