diff --git a/.github/workflows/submit.yml b/.github/workflows/submit.yml
index 382c4e20872..ebe72381738 100644
--- a/.github/workflows/submit.yml
+++ b/.github/workflows/submit.yml
@@ -9,7 +9,7 @@ on:
platforms:
description: "Platform(s) to execute on"
required: true
- default: "Linux x64, Windows x64, macOS x64"
+ default: "Linux x64, Linux x86, Windows x64, macOS x64"
jobs:
prerequisites:
@@ -18,6 +18,7 @@ jobs:
outputs:
should_run: ${{ steps.check_submit.outputs.should_run }}
bundle_id: ${{ steps.check_bundle_id.outputs.bundle_id }}
+ platform_linux_x86: ${{ steps.check_platforms.outputs.platform_linux_x86 }}
platform_linux_x64: ${{ steps.check_platforms.outputs.platform_linux_x64 }}
platform_windows_x64: ${{ steps.check_platforms.outputs.platform_windows_x64 }}
platform_macos_x64: ${{ steps.check_platforms.outputs.platform_macos_x64 }}
@@ -32,11 +33,13 @@ jobs:
id: check_platforms
run: |
echo "::set-output name=platform_linux_x64::${{ contains(github.event.inputs.platforms, 'linux x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x64'))) }}"
+ echo "::set-output name=platform_linux_x86::${{ contains(github.event.inputs.platforms, 'linux x86') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x86'))) }}"
echo "::set-output name=platform_windows_x64::${{ contains(github.event.inputs.platforms, 'windows x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'windows x64'))) }}"
echo "::set-output name=platform_macos_x64::${{ contains(github.event.inputs.platforms, 'macos x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'macos x64'))) }}"
if: steps.check_submit.outputs.should_run != 'false'
- name: Determine unique bundle identifier
+ id: check_bundle_id
run: echo "::set-output name=bundle_id::${GITHUB_ACTOR}_${GITHUB_SHA:0:8}"
if: steps.check_submit.outputs.should_run != 'false'
@@ -113,7 +116,7 @@ jobs:
flags: --enable-debug
artifact: -debug
- flavor: build hotspot no-pch
- flags: --disable-precompiled-headers
+ flags: --enable-debug --disable-precompiled-headers
build-target: hotspot
- flavor: build hotspot zero
flags: --enable-debug --disable-precompiled-headers --with-jvm-variants=zero
@@ -348,12 +351,326 @@ jobs:
if: always()
run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ linux_x86_build:
+ name: Linux x86
+ runs-on: "ubuntu-latest"
+ needs: prerequisites
+ if: needs.prerequisites.outputs.should_run != 'false' && needs.prerequisites.outputs.platform_linux_x86 != 'false'
+
+ strategy:
+ fail-fast: false
+ matrix:
+ flavor:
+ - build release
+ - build debug
+ include:
+ - flavor: build debug
+ flags: --enable-debug
+ artifact: -debug
+
+ # Reduced 32-bit build uses the same boot JDK as 64-bit build
+ env:
+ JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}"
+ BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}"
+ BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}"
+ BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}"
+ BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}"
+
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+ with:
+ path: jdk
+
+ - name: Restore boot JDK from cache
+ id: bootjdk
+ uses: actions/cache@v2
+ with:
+ path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }}
+ key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1
+
+ - name: Download boot JDK
+ run: |
+ mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}"
+ echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null -
+ tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"
+ if: steps.bootjdk.outputs.cache-hit != 'true'
+
+ - name: Restore jtreg artifact
+ id: jtreg_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ continue-on-error: true
+
+ - name: Restore jtreg artifact (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ if: steps.jtreg_restore.outcome == 'failure'
+
+ - name: Checkout gtest sources
+ uses: actions/checkout@v2
+ with:
+ repository: "google/googletest"
+ ref: "release-${{ fromJson(needs.prerequisites.outputs.dependencies).GTEST_VERSION }}"
+ path: gtest
+
+ # Roll in the multilib environment and its dependencies.
+ # Some multilib libraries do not have proper inter-dependencies, so we have to
+ # install their dependencies manually.
+ - name: Install dependencies
+ run: |
+ sudo dpkg --add-architecture i386
+ sudo apt-get update
+ sudo apt-get install gcc-multilib g++-multilib libfreetype6-dev:i386 libxrandr-dev:i386 libxtst-dev:i386 libtiff-dev:i386 libcupsimage2-dev:i386 libcups2-dev:i386 libasound2-dev:i386
+
+ - name: Configure
+ run: >
+ bash configure
+ --with-conf-name=linux-x86
+ --with-target-bits=32
+ ${{ matrix.flags }}
+ --with-version-opt=${GITHUB_ACTOR}-${GITHUB_SHA}
+ --with-version-build=0
+ --with-boot-jdk=${HOME}/bootjdk/${BOOT_JDK_VERSION}
+ --with-jtreg=${HOME}/jtreg
+ --with-gtest=${GITHUB_WORKSPACE}/gtest
+ --with-default-make-target="product-bundles test-bundles"
+ --with-zlib=system
+ --enable-jtreg-failure-handler
+ working-directory: jdk
+
+ - name: Build
+ run: make CONF_NAME=linux-x86 ${{ matrix.build-target }}
+ working-directory: jdk
+
+ - name: Persist test bundles
+ uses: actions/upload-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: |
+ jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz
+ jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz
+ if: matrix.build-target == false
+
+ linux_x86_test:
+ name: Linux x86
+ runs-on: "ubuntu-latest"
+ needs:
+ - prerequisites
+ - linux_x86_build
+
+ strategy:
+ fail-fast: false
+ matrix:
+ test:
+ - jdk/tier1 part 1
+ - jdk/tier1 part 2
+ - jdk/tier1 part 3
+ - langtools/tier1
+ - hs/tier1 common
+ - hs/tier1 compiler
+ - hs/tier1 gc
+ - hs/tier1 runtime
+ - hs/tier1 serviceability
+ include:
+ - test: jdk/tier1 part 1
+ suites: test/jdk/:tier1_part1
+ - test: jdk/tier1 part 2
+ suites: test/jdk/:tier1_part2
+ - test: jdk/tier1 part 3
+ suites: test/jdk/:tier1_part3
+ - test: langtools/tier1
+ suites: test/langtools/:tier1
+ - test: hs/tier1 common
+ suites: test/hotspot/jtreg/:tier1_common
+ artifact: -debug
+ - test: hs/tier1 compiler
+ suites: test/hotspot/jtreg/:tier1_compiler
+ artifact: -debug
+ - test: hs/tier1 gc
+ suites: test/hotspot/jtreg/:tier1_gc
+ artifact: -debug
+ - test: hs/tier1 runtime
+ suites: test/hotspot/jtreg/:tier1_runtime
+ artifact: -debug
+ - test: hs/tier1 serviceability
+ suites: test/hotspot/jtreg/:tier1_serviceability
+ artifact: -debug
+
+ # Reduced 32-bit build uses the same boot JDK as 64-bit build
+ env:
+ JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}"
+ BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}"
+ BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}"
+ BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}"
+ BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}"
+
+ steps:
+ - name: Checkout the source
+ uses: actions/checkout@v2
+
+ - name: Restore boot JDK from cache
+ id: bootjdk
+ uses: actions/cache@v2
+ with:
+ path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }}
+ key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1
+
+ - name: Download boot JDK
+ run: |
+ mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}"
+ echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null -
+ tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}"
+ mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"
+ if: steps.bootjdk.outputs.cache-hit != 'true'
+
+ - name: Restore jtreg artifact
+ id: jtreg_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ continue-on-error: true
+
+ - name: Restore jtreg artifact (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jtreg/
+ if: steps.jtreg_restore.outcome == 'failure'
+
+ - name: Restore build artifacts
+ id: build_restore
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jdk-linux-x86${{ matrix.artifact }}
+ continue-on-error: true
+
+ - name: Restore build artifacts (retry)
+ uses: actions/download-artifact@v2
+ with:
+ name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }}
+ path: ~/jdk-linux-x86${{ matrix.artifact }}
+ if: steps.build_restore.outcome == 'failure'
+
+ - name: Unpack jdk
+ run: |
+ mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}"
+ tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}"
+
+ - name: Unpack tests
+ run: |
+ mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}"
+ tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}"
+
+ - name: Find root of jdk image dir
+ run: |
+ imageroot=`find ${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }} -name release -type f`
+ echo "imageroot=`dirname ${imageroot}`" >> $GITHUB_ENV
+
+ - name: Run tests
+ run: >
+ JDK_IMAGE_DIR=${{ env.imageroot }}
+ TEST_IMAGE_DIR=${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}
+ BOOT_JDK=${HOME}/bootjdk/${BOOT_JDK_VERSION}
+ JT_HOME=${HOME}/jtreg
+ make test-prebuilt
+ CONF_NAME=run-test-prebuilt
+ LOG_CMDLINES=true
+ JTREG_VERBOSE=fail,error,time
+ TEST="${{ matrix.suites }}"
+ TEST_OPTS_JAVA_OPTIONS=
+ JTREG_KEYWORDS="!headful"
+ JTREG="JAVA_OPTIONS=-XX:-CreateCoredumpOnCrash"
+
+ - name: Check that all tests executed successfully
+ if: always()
+ run: >
+ if ! grep --include=test-summary.txt -lqr build/*/test-results -e "TEST SUCCESS" ; then
+ cat build/*/test-results/*/text/newfailures.txt ;
+ exit 1 ;
+ fi
+
+ - name: Create suitable test log artifact name
+ if: always()
+ run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
+
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
if: always()
uses: actions/upload-artifact@v2
with:
- name: linux-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
windows_x64_build:
@@ -635,12 +952,41 @@ jobs:
if: always()
run: echo ("logsuffix=" + ("${{ matrix.test }}" -replace "/", "_" -replace " ", "_")) | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ;
+ zip -r9
+ "$HOME/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ;
+ zip -r9
+ "$HOME/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
if: always()
uses: actions/upload-artifact@v2
with:
- name: windows-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
macos_x64_build:
@@ -890,12 +1236,39 @@ jobs:
if: always()
run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV
- - name: Persist test logs
+ - name: Package test results
+ if: always()
+ working-directory: build/run-test-prebuilt/test-results/
+ run: >
+ zip -r9
+ "$HOME/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip"
+ .
+ continue-on-error: true
+
+ - name: Package test support
+ if: always()
+ working-directory: build/run-test-prebuilt/test-support/
+ run: >
+ zip -r9
+ "$HOME/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip"
+ .
+ -i *.jtr
+ -i */hs_err*.log
+ -i */replay*.log
+ continue-on-error: true
+
+ - name: Persist test results
+ if: always()
+ uses: actions/upload-artifact@v2
+ with:
+ path: ~/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip
+ continue-on-error: true
+
+ - name: Persist test outputs
if: always()
uses: actions/upload-artifact@v2
with:
- name: macos-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }}
- path: build/*/test-results
+ path: ~/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip
continue-on-error: true
artifacts:
@@ -904,7 +1277,9 @@ jobs:
if: always()
continue-on-error: true
needs:
+ - prerequisites
- linux_x64_test
+ - linux_x86_test
- windows_x64_test
- macos_x64_test
diff --git a/.gitignore b/.gitignore
index c34d27c8470..cf21c8919cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@
/dist/
/.idea/
/.vscode/
+/nbproject/
nbproject/private/
/webrev
/.src-rev
@@ -14,3 +15,4 @@ test/nashorn/lib
NashornProfile.txt
**/JTreport/**
**/JTwork/**
+/src/utils/LogCompilation/target/
diff --git a/.hgignore b/.hgignore
deleted file mode 100644
index 312ce62a641..00000000000
--- a/.hgignore
+++ /dev/null
@@ -1,18 +0,0 @@
-^build/
-^dist/
-^.idea/
-^.vscode/
-nbproject/private/
-^webrev
-^.src-rev$
-^.jib/
-(^|/)\.DS_Store
-(^|/)\.metadata/
-(^|/)\.recommenders/
-test/nashorn/script/external
-test/nashorn/lib
-NashornProfile.txt
-(^|/)JTreport/
-(^|/)JTwork/
-(^|/)\.git/
-^src/utils/hsdis/build/
\ No newline at end of file
diff --git a/doc/building.html b/doc/building.html
index 5f615f9d4ef..318a24aa840 100644
--- a/doc/building.html
+++ b/doc/building.html
@@ -78,6 +78,7 @@
Building the JDK
Native Libraries
Creating And Using Sysroots With qemu-deboostrap
Building for ARM/aarch64
+Building for musl
Verifying the Build
Build Performance
@@ -224,6 +225,8 @@ Linux
sudo apt-get install build-essential
For rpm-based distributions (Fedora, Red Hat, etc), try this:
sudo yum groupinstall "Development Tools"
+For Alpine Linux, aside from basic tooling, install the GNU versions of some programs:
+sudo apk add build-base bash grep zip
AIX
Please consult the AIX section of the Supported Build Platforms OpenJDK Build Wiki page for details about which versions of AIX are supported.
@@ -313,6 +316,7 @@ FreeType
- To install on an apt-based Linux, try running
sudo apt-get install libfreetype6-dev
.
- To install on an rpm-based Linux, try running
sudo yum install freetype-devel
.
+- To install on Alpine Linux, try running
sudo apk add freetype-dev
.
Use --with-freetype-include=<path>
and --with-freetype-lib=<path>
if configure
does not automatically locate the platform FreeType files.
CUPS
@@ -320,6 +324,7 @@ CUPS
- To install on an apt-based Linux, try running
sudo apt-get install libcups2-dev
.
- To install on an rpm-based Linux, try running
sudo yum install cups-devel
.
+- To install on Alpine Linux, try running
sudo apk add cups-dev
.
Use --with-cups=<path>
if configure
does not properly locate your CUPS files.
X11
@@ -327,6 +332,7 @@ X11
- To install on an apt-based Linux, try running
sudo apt-get install libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev
.
- To install on an rpm-based Linux, try running
sudo yum install libXtst-devel libXt-devel libXrender-devel libXrandr-devel libXi-devel
.
+- To install on Alpine Linux, try running
sudo apk add libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev
.
Use --with-x=<path>
if configure
does not properly locate your X11 files.
ALSA
@@ -334,6 +340,7 @@ ALSA
- To install on an apt-based Linux, try running
sudo apt-get install libasound2-dev
.
- To install on an rpm-based Linux, try running
sudo yum install alsa-lib-devel
.
+- To install on Alpine Linux, try running
sudo apk add alsa-lib-dev
.
Use --with-alsa=<path>
if configure
does not properly locate your ALSA files.
libffi
@@ -341,6 +348,7 @@ libffi
- To install on an apt-based Linux, try running
sudo apt-get install libffi-dev
.
- To install on an rpm-based Linux, try running
sudo yum install libffi-devel
.
+- To install on Alpine Linux, try running
sudo apk add libffi-dev
.
Use --with-libffi=<path>
if configure
does not properly locate your libffi files.
@@ -349,6 +357,7 @@ Autoconf
- To install on an apt-based Linux, try running
sudo apt-get install autoconf
.
- To install on an rpm-based Linux, try running
sudo yum install autoconf
.
+- To install on Alpine Linux, try running
sudo apk add autoconf
.
- To install on macOS, try running
brew install autoconf
.
- To install on Windows, try running
<path to Cygwin setup>/setup-x86_64 -q -P autoconf
.
@@ -620,21 +629,30 @@ Creating And Using Sys
Fortunately, you can create sysroots for foreign architectures with tools provided by your OS. On Debian/Ubuntu systems, one could use qemu-deboostrap
to create the target system chroot, which would have the native libraries and headers specific to that target system. After that, we can use the cross-compiler on the build system, pointing into chroot to get the build dependencies right. This allows building for foreign architectures with native compilation speed.
For example, cross-compiling to AArch64 from x86_64 could be done like this:
-- Install cross-compiler on the build system:
-
-apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
-
-- Create chroot on the build system, configuring it for target system:
-
-sudo qemu-debootstrap --arch=arm64 --verbose \
- --include=fakeroot,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng12-dev \
- --resolve-deps jessie /chroots/arm64 http://httpredir.debian.org/debian/
-
-- Configure and build with newly created chroot as sysroot/toolchain-path:
-
-CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure --openjdk-target=aarch64-linux-gnu --with-sysroot=/chroots/arm64/ --with-toolchain-path=/chroots/arm64/
+Install cross-compiler on the build system:
+apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
+Create chroot on the build system, configuring it for target system:
+sudo qemu-debootstrap \
+ --arch=arm64 \
+ --verbose \
+ --include=fakeroot,symlinks,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng-dev \
+ --resolve-deps \
+ buster \
+ ~/sysroot-arm64 \
+ http://httpredir.debian.org/debian/
+Make sure the symlinks inside the newly created chroot point to proper locations:
+sudo chroot ~/sysroot-arm64 symlinks -cr .
+Configure and build with newly created chroot as sysroot/toolchain-path:
+CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure \
+ --openjdk-target=aarch64-linux-gnu \
+ --with-sysroot=~/sysroot-arm64 \
+ --with-toolchain-path=~/sysroot-arm64 \
+ --with-freetype-lib=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ \
+ --with-freetype-include=~/sysroot-arm64/usr/include/freetype2/ \
+ --x-libraries=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/
make images
-ls build/linux-aarch64-normal-server-release/
+ls build/linux-aarch64-server-release/
+
The build does not create new files in that chroot, so it can be reused for multiple builds without additional cleanup.
Architectures that are known to successfully cross-compile like this are:
@@ -688,6 +706,15 @@ Creating And Using Sys
Additional architectures might be supported by Debian/Ubuntu Ports.
Building for ARM/aarch64
A common cross-compilation target is the ARM CPU. When building for ARM, it is useful to set the ABI profile. A number of pre-defined ABI profiles are available using --with-abi-profile
: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK.
+Building for musl
+Just like it's possible to cross-compile for a different CPU, it's possible to cross-compile for musl libc on a glibc-based build system. A devkit suitable for most target CPU architectures can be obtained from musl.cc. After installing the required packages in the sysroot, configure the build with --openjdk-target
:
+sh ./configure --with-jvm-variants=server \
+--with-boot-jdk=$BOOT_JDK \
+--with-build-jdk=$BUILD_JDK \
+--openjdk-target=x86_64-unknown-linux-musl \
+--with-devkit=$DEVKIT \
+--with-sysroot=$SYSROOT
+and run make
normally.
Verifying the Build
The build will end up in a directory named like build/linux-arm-normal-server-release
.
Inside this build output directory, the images/jdk
will contain the newly built JDK, for your target system.
diff --git a/doc/building.md b/doc/building.md
index 47fa445998d..e0ac5c7b6c7 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -273,6 +273,13 @@ For rpm-based distributions (Fedora, Red Hat, etc), try this:
sudo yum groupinstall "Development Tools"
```
+For Alpine Linux, aside from basic tooling, install the GNU versions of some
+programs:
+
+```
+sudo apk add build-base bash grep zip
+```
+
### AIX
Please consult the AIX section of the [Supported Build Platforms](
@@ -431,6 +438,7 @@ rather than bundling the JDK's own copy.
libfreetype6-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
freetype-devel`.
+ * To install on Alpine Linux, try running `sudo apk add freetype-dev`.
Use `--with-freetype-include=` and `--with-freetype-lib=`
if `configure` does not automatically locate the platform FreeType files.
@@ -445,6 +453,7 @@ your operating system.
libcups2-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
cups-devel`.
+ * To install on Alpine Linux, try running `sudo apk add cups-dev`.
Use `--with-cups=` if `configure` does not properly locate your CUPS
files.
@@ -458,6 +467,8 @@ Linux.
libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
libXtst-devel libXt-devel libXrender-devel libXrandr-devel libXi-devel`.
+ * To install on Alpine Linux, try running `sudo apk add libx11-dev
+ libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`.
Use `--with-x=` if `configure` does not properly locate your X11 files.
@@ -470,6 +481,7 @@ required on Linux. At least version 0.9.1 of ALSA is required.
libasound2-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
alsa-lib-devel`.
+ * To install on Alpine Linux, try running `sudo apk add alsa-lib-dev`.
Use `--with-alsa=` if `configure` does not properly locate your ALSA
files.
@@ -484,6 +496,7 @@ Hotspot.
libffi-dev`.
* To install on an rpm-based Linux, try running `sudo yum install
libffi-devel`.
+ * To install on Alpine Linux, try running `sudo apk add libffi-dev`.
Use `--with-libffi=` if `configure` does not properly locate your libffi
files.
@@ -499,6 +512,7 @@ platforms. At least version 2.69 is required.
autoconf`.
* To install on an rpm-based Linux, try running `sudo yum install
autoconf`.
+ * To install on Alpine Linux, try running `sudo apk add autoconf`.
* To install on macOS, try running `brew install autoconf`.
* To install on Windows, try running `/setup-x86_64 -q
-P autoconf`.
@@ -1072,23 +1086,39 @@ for foreign architectures with native compilation speed.
For example, cross-compiling to AArch64 from x86_64 could be done like this:
* Install cross-compiler on the *build* system:
-```
-apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
-```
+ ```
+ apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu
+ ```
* Create chroot on the *build* system, configuring it for *target* system:
-```
-sudo qemu-debootstrap --arch=arm64 --verbose \
- --include=fakeroot,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng12-dev \
- --resolve-deps jessie /chroots/arm64 http://httpredir.debian.org/debian/
-```
+ ```
+ sudo qemu-debootstrap \
+ --arch=arm64 \
+ --verbose \
+ --include=fakeroot,symlinks,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng-dev \
+ --resolve-deps \
+ buster \
+ ~/sysroot-arm64 \
+ http://httpredir.debian.org/debian/
+ ```
+
+ * Make sure the symlinks inside the newly created chroot point to proper locations:
+ ```
+ sudo chroot ~/sysroot-arm64 symlinks -cr .
+ ```
* Configure and build with newly created chroot as sysroot/toolchain-path:
-```
-CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure --openjdk-target=aarch64-linux-gnu --with-sysroot=/chroots/arm64/ --with-toolchain-path=/chroots/arm64/
-make images
-ls build/linux-aarch64-normal-server-release/
-```
+ ```
+ CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure \
+ --openjdk-target=aarch64-linux-gnu \
+ --with-sysroot=~/sysroot-arm64 \
+ --with-toolchain-path=~/sysroot-arm64 \
+ --with-freetype-lib=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ \
+ --with-freetype-include=~/sysroot-arm64/usr/include/freetype2/ \
+ --x-libraries=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/
+ make images
+ ls build/linux-aarch64-server-release/
+ ```
The build does not create new files in that chroot, so it can be reused for multiple builds
without additional cleanup.
@@ -1113,6 +1143,25 @@ available using `--with-abi-profile`: arm-vfp-sflt, arm-vfp-hflt, arm-sflt,
armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer
properly supported by the JDK.
+### Building for musl
+
+Just like it's possible to cross-compile for a different CPU, it's possible to
+cross-compile for musl libc on a glibc-based *build* system.
+A devkit suitable for most target CPU architectures can be obtained from
+[musl.cc](https://musl.cc). After installing the required packages in the
+sysroot, configure the build with `--openjdk-target`:
+
+```
+sh ./configure --with-jvm-variants=server \
+--with-boot-jdk=$BOOT_JDK \
+--with-build-jdk=$BUILD_JDK \
+--openjdk-target=x86_64-unknown-linux-musl \
+--with-devkit=$DEVKIT \
+--with-sysroot=$SYSROOT
+```
+
+and run `make` normally.
+
### Verifying the Build
The build will end up in a directory named like
diff --git a/make/Bundles.gmk b/make/Bundles.gmk
index b7c8ddbfbe7..b52b5720772 100644
--- a/make/Bundles.gmk
+++ b/make/Bundles.gmk
@@ -410,17 +410,43 @@ endif
################################################################################
-ifneq ($(filter docs-bundles, $(MAKECMDGOALS)), )
- DOCS_BUNDLE_FILES := $(call FindFiles, $(DOCS_IMAGE_DIR))
+ifneq ($(filter docs-jdk-bundles, $(MAKECMDGOALS)), )
+ DOCS_JDK_BUNDLE_FILES := $(call FindFiles, $(DOCS_JDK_IMAGE_DIR))
- $(eval $(call SetupBundleFile, BUILD_DOCS_BUNDLE, \
- BUNDLE_NAME := $(DOCS_BUNDLE_NAME), \
- FILES := $(DOCS_BUNDLE_FILES), \
- BASE_DIRS := $(DOCS_IMAGE_DIR), \
+ $(eval $(call SetupBundleFile, BUILD_DOCS_JDK_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_JDK_BUNDLE_NAME), \
+ FILES := $(DOCS_JDK_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_JDK_IMAGE_DIR), \
SUBDIR := docs, \
))
- DOCS_TARGETS += $(BUILD_DOCS_BUNDLE)
+ DOCS_JDK_TARGETS += $(BUILD_DOCS_JDK_BUNDLE)
+endif
+
+ifneq ($(filter docs-javase-bundles, $(MAKECMDGOALS)), )
+ DOCS_JAVASE_BUNDLE_FILES := $(call FindFiles, $(DOCS_JAVASE_IMAGE_DIR))
+
+ $(eval $(call SetupBundleFile, BUILD_DOCS_JAVASE_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_JAVASE_BUNDLE_NAME), \
+ FILES := $(DOCS_JAVASE_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_JAVASE_IMAGE_DIR), \
+ SUBDIR := docs-javase, \
+ ))
+
+ DOCS_JAVASE_TARGETS += $(BUILD_DOCS_JAVASE_BUNDLE)
+endif
+
+ifneq ($(filter docs-reference-bundles, $(MAKECMDGOALS)), )
+ DOCS_REFERENCE_BUNDLE_FILES := $(call FindFiles, $(DOCS_REFERENCE_IMAGE_DIR))
+
+ $(eval $(call SetupBundleFile, BUILD_DOCS_REFERENCE_BUNDLE, \
+ BUNDLE_NAME := $(DOCS_REFERENCE_BUNDLE_NAME), \
+ FILES := $(DOCS_REFERENCE_BUNDLE_FILES), \
+ BASE_DIRS := $(DOCS_REFERENCE_IMAGE_DIR), \
+ SUBDIR := docs-reference, \
+ ))
+
+ DOCS_REFERENCE_TARGETS += $(BUILD_DOCS_REFERENCE_BUNDLE)
endif
################################################################################
@@ -469,9 +495,12 @@ $(eval $(call IncludeCustomExtension, Bundles.gmk))
product-bundles: $(PRODUCT_TARGETS)
legacy-bundles: $(LEGACY_TARGETS)
test-bundles: $(TEST_TARGETS)
-docs-bundles: $(DOCS_TARGETS)
+docs-jdk-bundles: $(DOCS_JDK_TARGETS)
+docs-javase-bundles: $(DOCS_JAVASE_TARGETS)
+docs-reference-bundles: $(DOCS_REFERENCE_TARGETS)
static-libs-bundles: $(STATIC_LIBS_TARGETS)
jcov-bundles: $(JCOV_TARGETS)
-.PHONY: all default product-bundles test-bundles docs-bundles \
+.PHONY: all default product-bundles test-bundles \
+ docs-jdk-bundles docs-javase-bundles docs-reference-bundles \
static-libs-bundles jcov-bundles
diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
index c4d25c90122..e8997e0da83 100644
--- a/make/CompileJavaModules.gmk
+++ b/make/CompileJavaModules.gmk
@@ -184,10 +184,6 @@ ifeq ($(call isTargetOs, windows), true)
java.desktop_EXCLUDES += com/sun/java/swing/plaf/gtk
endif
-ifdef BUILD_HEADLESS_ONLY
- java.desktop_EXCLUDES += sun/applet
-endif
-
ifeq ($(call isTargetOs, windows macosx), false)
java.desktop_EXCLUDE_FILES += sun/awt/AWTCharset.java
endif
@@ -389,11 +385,11 @@ endif
################################################################################
-jdk.incubator.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \
+jdk.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \
.postrm .postinst .list .sh .desktop .copyright .control .plist .template \
.icns .scpt .wxs .wxl .wxi .ico .bmp .tiff
-jdk.incubator.jpackage_CLEAN += .properties
+jdk.jpackage_CLEAN += .properties
################################################################################
@@ -546,6 +542,10 @@ jdk.jfr_DISABLED_WARNINGS += exports
jdk.jfr_COPY := .xsd .xml .dtd
jdk.jfr_JAVAC_FLAGS := -XDstringConcat=inline
+################################################################################
+
+jdk.incubator.vector_DOCLINT += -Xdoclint:all/protected
+
################################################################################
# If this is an imported module that has prebuilt classes, only compile
# module-info.java.
diff --git a/make/CompileModuleTools.gmk b/make/CompileModuleTools.gmk
index c6322e5b36e..18cd42f0612 100644
--- a/make/CompileModuleTools.gmk
+++ b/make/CompileModuleTools.gmk
@@ -33,8 +33,20 @@ include JavaCompilation.gmk
TOOLS_CLASSES_DIR := $(BUILDTOOLS_OUTPUTDIR)/tools_jigsaw_classes
+# When using an external BUILDJDK, make it possible to shortcut building of
+# these tools using the BUILD_JAVAC instead of having to build the complete
+# exploded image first.
+ifeq ($(EXTERNAL_BUILDJDK), true)
+ COMPILER := buildjdk
+ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK)
+else
+ COMPILER := interim
+ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED)
+endif
+
$(eval $(call SetupJavaCompilation, BUILD_JIGSAW_TOOLS, \
- TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
+ TARGET_RELEASE := $(TARGET_RELEASE), \
+ COMPILER := $(COMPILER), \
SRC := $(TOPDIR)/make/jdk/src/classes, \
INCLUDES := build/tools/deps \
build/tools/docs \
diff --git a/make/CompileToolsJdk.gmk b/make/CompileToolsJdk.gmk
index a671f934998..2f09476aa67 100644
--- a/make/CompileToolsJdk.gmk
+++ b/make/CompileToolsJdk.gmk
@@ -56,7 +56,8 @@ $(eval $(call SetupJavaCompilation, BUILD_TOOLS_JDK, \
DISABLED_WARNINGS := options, \
JAVAC_FLAGS := \
--add-exports java.desktop/sun.awt=ALL-UNNAMED \
- --add-exports java.base/sun.text=ALL-UNNAMED, \
+ --add-exports java.base/sun.text=ALL-UNNAMED \
+ --add-exports java.base/sun.security.util=ALL-UNNAMED, \
))
TARGETS += $(BUILD_TOOLS_JDK)
diff --git a/make/Docs.gmk b/make/Docs.gmk
index 19e962b79af..1f7a0caf819 100644
--- a/make/Docs.gmk
+++ b/make/Docs.gmk
@@ -458,7 +458,7 @@ $(eval $(call SetupApiDocsGeneration, JAVASE_API, \
MODULES := $(JAVASE_MODULES), \
SHORT_NAME := $(JAVASE_SHORT_NAME), \
LONG_NAME := $(JAVASE_LONG_NAME), \
- TARGET_DIR := $(IMAGES_OUTPUTDIR)/javase-docs/api, \
+ TARGET_DIR := $(DOCS_JAVASE_IMAGE_DIR)/api, \
))
# Targets generated are returned in JAVASE_API_JAVADOC_TARGETS and
@@ -476,7 +476,7 @@ $(eval $(call SetupApiDocsGeneration, REFERENCE_API, \
MODULES := $(JAVASE_MODULES), \
SHORT_NAME := $(JAVASE_SHORT_NAME), \
LONG_NAME := $(JAVASE_LONG_NAME), \
- TARGET_DIR := $(IMAGES_OUTPUTDIR)/reference-docs/api, \
+ TARGET_DIR := $(DOCS_REFERENCE_IMAGE_DIR)/api, \
JAVADOC_CMD := $(JAVADOC), \
OPTIONS := $(REFERENCE_OPTIONS), \
TAGS := $(REFERENCE_TAGS), \
diff --git a/make/Main.gmk b/make/Main.gmk
index 493b795d35a..cdb4be67c56 100644
--- a/make/Main.gmk
+++ b/make/Main.gmk
@@ -90,7 +90,6 @@ $(eval $(call SetupTarget, buildtools-jdk, \
$(eval $(call SetupTarget, buildtools-modules, \
MAKEFILE := CompileModuleTools, \
- DEPS := exploded-image-base, \
))
$(eval $(call SetupTarget, buildtools-hotspot, \
@@ -339,7 +338,7 @@ $(eval $(call SetupTarget, test-image-demos-jdk, \
$(eval $(call SetupTarget, generate-summary, \
MAKEFILE := GenerateModuleSummary, \
- DEPS := jmods buildtools-modules, \
+ DEPS := jmods buildtools-modules runnable-buildjdk, \
))
################################################################################
@@ -469,7 +468,7 @@ $(eval $(call SetupTarget, docs-jdk-api-javadoc, \
$(eval $(call SetupTarget, docs-jdk-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-jdk-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
$(eval $(call SetupTarget, docs-javase-api-javadoc, \
@@ -480,7 +479,7 @@ $(eval $(call SetupTarget, docs-javase-api-javadoc, \
$(eval $(call SetupTarget, docs-javase-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-javase-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
$(eval $(call SetupTarget, docs-reference-api-javadoc, \
@@ -491,7 +490,7 @@ $(eval $(call SetupTarget, docs-reference-api-javadoc, \
$(eval $(call SetupTarget, docs-reference-api-modulegraph, \
MAKEFILE := Docs, \
TARGET := docs-reference-api-modulegraph, \
- DEPS := exploded-image buildtools-modules, \
+ DEPS := buildtools-modules runnable-buildjdk, \
))
# The gensrc steps for jdk.jdi create html spec files.
@@ -749,12 +748,24 @@ $(eval $(call SetupTarget, test-bundles, \
DEPS := test-image, \
))
-$(eval $(call SetupTarget, docs-bundles, \
+$(eval $(call SetupTarget, docs-jdk-bundles, \
MAKEFILE := Bundles, \
- TARGET := docs-bundles, \
+ TARGET := docs-jdk-bundles, \
DEPS := docs-image, \
))
+$(eval $(call SetupTarget, docs-javase-bundles, \
+ MAKEFILE := Bundles, \
+ TARGET := docs-javase-bundles, \
+ DEPS := docs-javase-image, \
+))
+
+$(eval $(call SetupTarget, docs-reference-bundles, \
+ MAKEFILE := Bundles, \
+ TARGET := docs-reference-bundles, \
+ DEPS := docs-reference-image, \
+))
+
$(eval $(call SetupTarget, static-libs-bundles, \
MAKEFILE := Bundles, \
TARGET := static-libs-bundles, \
@@ -945,10 +956,13 @@ else
$(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): java.base-libs java.base-copy \
java.base-gendata jdk.jlink-launchers java
endif
- else
- # The normal non cross compilation case uses needs to wait for the full
+ else ifeq ($(EXTERNAL_BUILDJDK), false)
+ # The normal non cross compilation usecase needs to wait for the full
# exploded-image to avoid a race with the optimize target.
$(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): exploded-image
+ # The buildtools-modules are used for the exploded-image-optimize target,
+ # but can be built either using the exploded-image or an external BUILDJDK.
+ buildtools-modules: exploded-image-base
endif
# All modules include the main license files from java.base.
@@ -1069,6 +1083,18 @@ ifneq ($(COMPILE_TYPE), cross)
exploded-image: exploded-image-optimize
endif
+# The runnable-buildjdk target guarantees that the buildjdk is done
+# building and ready to be used. The exact set of dependencies it needs
+# depends on what kind of buildjdk is used for the current configuration.
+runnable-buildjdk:
+ifeq ($(CREATE_BUILDJDK), true)
+ ifneq ($(CREATING_BUILDJDK), true)
+ runnable-buildjdk: create-buildjdk
+ endif
+else ifeq ($(EXTERNAL_BUILDJDK), false)
+ runnable-buildjdk: exploded-image
+endif
+
create-buildjdk: create-buildjdk-interim-image
docs-jdk-api: docs-jdk-api-javadoc
@@ -1122,8 +1148,16 @@ ifeq ($(call isTargetOs, macosx), true)
legacy-images: mac-legacy-jre-bundle
endif
-# This target builds the documentation image
-docs-image: docs-jdk
+# These targets build the various documentation images
+docs-jdk-image: docs-jdk
+docs-javase-image: docs-javase
+docs-reference-image: docs-reference
+# The docs-jdk-image is what most users expect to be built
+docs-image: docs-jdk-image
+all-docs-images: docs-jdk-image docs-javase-image docs-reference-image
+
+docs-bundles: docs-jdk-bundles
+all-docs-bundles: docs-jdk-bundles docs-javase-bundles docs-reference-bundles
# This target builds the test image
test-image: prepare-test-image test-image-jdk-jtreg-native \
@@ -1156,7 +1190,7 @@ endif
################################################################################
# all-images builds all our deliverables as images.
-all-images: product-images test-image docs-image
+all-images: product-images test-image all-docs-images
# all-bundles packages all our deliverables as tar.gz bundles.
all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles
@@ -1164,10 +1198,11 @@ all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles
ALL_TARGETS += buildtools hotspot hotspot-libs hotspot-gensrc gensrc gendata \
copy java libs static-libs launchers jmods \
jdk.jdwp.agent-gensrc $(ALL_MODULES) demos \
- exploded-image-base exploded-image \
+ exploded-image-base exploded-image runnable-buildjdk \
create-buildjdk docs-jdk-api docs-javase-api docs-reference-api docs-jdk \
docs-javase docs-reference docs-javadoc mac-bundles product-images legacy-images \
- docs-image test-image all-images \
+ docs-image docs-javase-image docs-reference-image all-docs-images \
+ docs-bundles all-docs-bundles test-image all-images \
all-bundles
################################################################################
diff --git a/make/ReleaseFile.gmk b/make/ReleaseFile.gmk
index 14ebc9c32ae..0424e2fb623 100644
--- a/make/ReleaseFile.gmk
+++ b/make/ReleaseFile.gmk
@@ -53,6 +53,7 @@ define create-info-file
$(call info-file-item, "JAVA_VERSION_DATE", "$(VERSION_DATE)")
$(call info-file-item, "OS_NAME", "$(RELEASE_FILE_OS_NAME)")
$(call info-file-item, "OS_ARCH", "$(RELEASE_FILE_OS_ARCH)")
+ $(call info-file-item, "LIBC", "$(RELEASE_FILE_LIBC)")
endef
# Param 1 - The file containing the MODULES list
diff --git a/make/ToolsJdk.gmk b/make/ToolsJdk.gmk
index 296411559a9..45a0cc8c64e 100644
--- a/make/ToolsJdk.gmk
+++ b/make/ToolsJdk.gmk
@@ -1,5 +1,5 @@
#
-# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
@@ -68,6 +68,7 @@ TOOL_TZDB = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
build.tools.tzdb.TzdbZoneRulesCompiler
TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
+ --add-exports java.base/sun.security.util=ALL-UNNAMED \
build.tools.blacklistedcertsconverter.BlacklistedCertsConverter
TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \
diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess
index b650b5109d0..14f21a25e8f 100644
--- a/make/autoconf/build-aux/config.guess
+++ b/make/autoconf/build-aux/config.guess
@@ -30,6 +30,17 @@
DIR=`dirname $0`
OUT=`. $DIR/autoconf-config.guess`
+# Detect C library.
+# Use '-gnu' suffix on systems that use glibc.
+# Use '-musl' suffix on systems that use the musl libc.
+echo $OUT | grep -- -linux- > /dev/null 2> /dev/null
+if test $? = 0; then
+ libc_vendor=`ldd --version 2>&1 | sed -n '1s/.*\(musl\).*/\1/p'`
+ if [ x"${libc_vendor}" = x"musl" ]; then
+ OUT=`echo $OUT | sed 's/-gnu/-musl/'`
+ fi
+fi
+
# Test and fix cygwin on x86_64
echo $OUT | grep 86-pc-cygwin > /dev/null 2> /dev/null
if test $? != 0; then
diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub
index a36e6690728..d0dd001abdf 100644
--- a/make/autoconf/build-aux/config.sub
+++ b/make/autoconf/build-aux/config.sub
@@ -29,6 +29,11 @@
DIR=`dirname $0`
+if echo $* | grep linux-musl >/dev/null ; then
+ echo $*
+ exit
+fi
+
# Allow wsl
if echo $* | grep x86_64-pc-wsl >/dev/null ; then
echo $*
diff --git a/make/autoconf/buildjdk-spec.gmk.in b/make/autoconf/buildjdk-spec.gmk.in
index 7134e34bcee..524f35f417c 100644
--- a/make/autoconf/buildjdk-spec.gmk.in
+++ b/make/autoconf/buildjdk-spec.gmk.in
@@ -54,11 +54,13 @@ IMAGES_OUTPUTDIR := $(patsubst $(OUTPUTDIR)%,$(BUILDJDK_OUTPUTDIR)%,$(IMAGES_OUT
OPENJDK_BUILD_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@
OPENJDK_BUILD_CPU_LEGACY_LIB := @OPENJDK_BUILD_CPU_LEGACY_LIB@
+OPENJDK_BUILD_LIBC := @OPENJDK_BUILD_LIBC@
OPENJDK_TARGET_CPU := @OPENJDK_BUILD_CPU@
OPENJDK_TARGET_CPU_ARCH := @OPENJDK_BUILD_CPU_ARCH@
OPENJDK_TARGET_CPU_BITS := @OPENJDK_BUILD_CPU_BITS@
OPENJDK_TARGET_CPU_ENDIAN := @OPENJDK_BUILD_CPU_ENDIAN@
OPENJDK_TARGET_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@
+OPENJDK_TARGET_LIBC := @OPENJDK_BUILD_LIBC@
OPENJDK_TARGET_OS_INCLUDE_SUBDIR := @OPENJDK_BUILD_OS_INCLUDE_SUBDIR@
HOTSPOT_TARGET_OS := @HOTSPOT_BUILD_OS@
@@ -66,6 +68,7 @@ HOTSPOT_TARGET_OS_TYPE := @HOTSPOT_BUILD_OS_TYPE@
HOTSPOT_TARGET_CPU := @HOTSPOT_BUILD_CPU@
HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_BUILD_CPU_ARCH@
HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_BUILD_CPU_DEFINE@
+HOTSPOT_TARGET_LIBC := @HOTSPOT_BUILD_LIBC@
CFLAGS_JDKLIB := @OPENJDK_BUILD_CFLAGS_JDKLIB@
CXXFLAGS_JDKLIB := @OPENJDK_BUILD_CXXFLAGS_JDKLIB@
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index 588df7f0011..d4738ad6837 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -231,8 +231,14 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
# -D_FORTIFY_SOURCE=2 hardening option needs optimization (at least -O1) enabled
# set for lower O-levels -U_FORTIFY_SOURCE to overwrite previous settings
if test "x$OPENJDK_TARGET_OS" = xlinux -a "x$DEBUG_LEVEL" = "xfastdebug"; then
- ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2"
DISABLE_FORTIFY_CFLAGS="-U_FORTIFY_SOURCE"
+ # ASan doesn't work well with _FORTIFY_SOURCE
+ # See https://github.com/google/sanitizers/wiki/AddressSanitizer#faq
+ if test "x$ASAN_ENABLED" = xyes; then
+ ENABLE_FORTIFY_CFLAGS="${DISABLE_FORTIFY_CFLAGS}"
+ else
+ ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2"
+ fi
C_O_FLAG_HIGHEST_JVM="${C_O_FLAG_HIGHEST_JVM} ${ENABLE_FORTIFY_CFLAGS}"
C_O_FLAG_HIGHEST="${C_O_FLAG_HIGHEST} ${ENABLE_FORTIFY_CFLAGS}"
C_O_FLAG_HI="${C_O_FLAG_HI} ${ENABLE_FORTIFY_CFLAGS}"
@@ -558,6 +564,11 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
fi
fi
+ OS_CFLAGS="$OS_CFLAGS -DLIBC=$OPENJDK_TARGET_LIBC"
+ if test "x$OPENJDK_TARGET_LIBC" = xmusl; then
+ OS_CFLAGS="$OS_CFLAGS -DMUSL_LIBC"
+ fi
+
# Where does this really belong??
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
PICFLAG="-fPIC"
@@ -652,16 +663,10 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP],
$1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -DARCH='\"$FLAGS_CPU_LEGACY\"' \
-D$FLAGS_CPU_LEGACY"
- if test "x$FLAGS_CPU_BITS" = x64; then
- # -D_LP64=1 is only set on linux and mac. Setting on windows causes diff in
- # unpack200.exe.
- if test "x$FLAGS_OS" = xlinux || test "x$FLAGS_OS" = xmacosx; then
- $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1"
- fi
- if test "x$FLAGS_OS" != xaix; then
- # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it.
- $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1"
- fi
+ if test "x$FLAGS_CPU_BITS" = x64 && test "x$FLAGS_OS" != xaix; then
+ # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it.
+ $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1"
+ $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1"
fi
# toolchain dependend, per-cpu
diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4
index 8f58db17d4a..a112a78d624 100644
--- a/make/autoconf/jdk-options.m4
+++ b/make/autoconf/jdk-options.m4
@@ -423,7 +423,10 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_ADDRESS_SANITIZER],
fi
],
IF_ENABLED: [
- ASAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer"
+ # ASan is simply incompatible with gcc -Wstringop-truncation. See
+ # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85650
+ # It's harmless to be suppressed in clang as well.
+ ASAN_CFLAGS="-fsanitize=address -Wno-stringop-truncation -fno-omit-frame-pointer"
ASAN_LDFLAGS="-fsanitize=address"
JVM_CFLAGS="$JVM_CFLAGS $ASAN_CFLAGS"
JVM_LDFLAGS="$JVM_LDFLAGS $ASAN_LDFLAGS"
diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4
index 04ca7b4e909..5ad791795a7 100644
--- a/make/autoconf/jvm-features.m4
+++ b/make/autoconf/jvm-features.m4
@@ -306,7 +306,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_GRAAL],
# Graal is only available where JVMCI is available since it requires JVMCI.
if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then
AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then
+ elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
@@ -340,7 +340,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_JVMCI],
AC_MSG_CHECKING([if platform is supported by JVMCI])
if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then
AC_MSG_RESULT([yes])
- elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then
+ elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then
AC_MSG_RESULT([yes])
else
AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU])
diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4
index 5120918aed2..e6aafe01550 100644
--- a/make/autoconf/libraries.m4
+++ b/make/autoconf/libraries.m4
@@ -43,9 +43,11 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES],
if test "x$OPENJDK_TARGET_OS" = xwindows || test "x$OPENJDK_TARGET_OS" = xmacosx; then
# No X11 support on windows or macosx
NEEDS_LIB_X11=false
+ elif test "x$ENABLE_HEADLESS_ONLY" = xtrue; then
+ # No X11 support needed when building headless only
+ NEEDS_LIB_X11=false
else
- # All other instances need X11, even if building headless only, libawt still
- # needs X11 headers.
+ # All other instances need X11
NEEDS_LIB_X11=true
fi
diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
index c0f2446dbd7..2f39d2b0ca7 100644
--- a/make/autoconf/platform.m4
+++ b/make/autoconf/platform.m4
@@ -220,6 +220,24 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_OS],
esac
])
+# Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
+# Converts autoconf style OS name to OpenJDK style, into
+# VAR_LIBC.
+AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_LIBC],
+[
+ case "$1" in
+ *linux*-musl)
+ VAR_LIBC=musl
+ ;;
+ *linux*-gnu)
+ VAR_LIBC=gnu
+ ;;
+ *)
+ VAR_LIBC=default
+ ;;
+ esac
+])
+
# Expects $host_os $host_cpu $build_os and $build_cpu
# and $with_target_bits to have been setup!
#
@@ -237,9 +255,10 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_TARGET_AUTOCONF_NAME)
AC_SUBST(OPENJDK_BUILD_AUTOCONF_NAME)
- # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables.
+ # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables.
PLATFORM_EXTRACT_VARS_FROM_OS($build_os)
PLATFORM_EXTRACT_VARS_FROM_CPU($build_cpu)
+ PLATFORM_EXTRACT_VARS_FROM_LIBC($build_os)
# ..and setup our own variables. (Do this explicitly to facilitate searching)
OPENJDK_BUILD_OS="$VAR_OS"
if test "x$VAR_OS_TYPE" != x; then
@@ -256,6 +275,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
OPENJDK_BUILD_CPU_ARCH="$VAR_CPU_ARCH"
OPENJDK_BUILD_CPU_BITS="$VAR_CPU_BITS"
OPENJDK_BUILD_CPU_ENDIAN="$VAR_CPU_ENDIAN"
+ OPENJDK_BUILD_LIBC="$VAR_LIBC"
AC_SUBST(OPENJDK_BUILD_OS)
AC_SUBST(OPENJDK_BUILD_OS_TYPE)
AC_SUBST(OPENJDK_BUILD_OS_ENV)
@@ -263,13 +283,20 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_BUILD_CPU_ARCH)
AC_SUBST(OPENJDK_BUILD_CPU_BITS)
AC_SUBST(OPENJDK_BUILD_CPU_ENDIAN)
+ AC_SUBST(OPENJDK_BUILD_LIBC)
AC_MSG_CHECKING([openjdk-build os-cpu])
AC_MSG_RESULT([$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU])
- # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables.
+ if test "x$OPENJDK_BUILD_OS" = "xlinux"; then
+ AC_MSG_CHECKING([openjdk-build C library])
+ AC_MSG_RESULT([$OPENJDK_BUILD_LIBC])
+ fi
+
+ # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables.
PLATFORM_EXTRACT_VARS_FROM_OS($host_os)
PLATFORM_EXTRACT_VARS_FROM_CPU($host_cpu)
+ PLATFORM_EXTRACT_VARS_FROM_LIBC($host_os)
# ... and setup our own variables. (Do this explicitly to facilitate searching)
OPENJDK_TARGET_OS="$VAR_OS"
if test "x$VAR_OS_TYPE" != x; then
@@ -287,6 +314,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
OPENJDK_TARGET_CPU_BITS="$VAR_CPU_BITS"
OPENJDK_TARGET_CPU_ENDIAN="$VAR_CPU_ENDIAN"
OPENJDK_TARGET_OS_UPPERCASE=`$ECHO $OPENJDK_TARGET_OS | $TR 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'`
+ OPENJDK_TARGET_LIBC="$VAR_LIBC"
AC_SUBST(OPENJDK_TARGET_OS)
AC_SUBST(OPENJDK_TARGET_OS_TYPE)
@@ -296,9 +324,15 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD],
AC_SUBST(OPENJDK_TARGET_CPU_ARCH)
AC_SUBST(OPENJDK_TARGET_CPU_BITS)
AC_SUBST(OPENJDK_TARGET_CPU_ENDIAN)
+ AC_SUBST(OPENJDK_TARGET_LIBC)
AC_MSG_CHECKING([openjdk-target os-cpu])
AC_MSG_RESULT([$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU])
+
+ if test "x$OPENJDK_TARGET_OS" = "xlinux"; then
+ AC_MSG_CHECKING([openjdk-target C library])
+ AC_MSG_RESULT([$OPENJDK_TARGET_LIBC])
+ fi
])
# Check if a reduced build (32-bit on 64-bit platforms) is requested, and modify behaviour
@@ -420,7 +454,13 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
else
OPENJDK_$1_CPU_BUNDLE="$OPENJDK_$1_CPU"
fi
- OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}"
+
+ OPENJDK_$1_LIBC_BUNDLE=""
+ if test "x$OPENJDK_$1_LIBC" = "xmusl"; then
+ OPENJDK_$1_LIBC_BUNDLE="-$OPENJDK_$1_LIBC"
+ fi
+
+ OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}${OPENJDK_$1_LIBC_BUNDLE}"
AC_SUBST(OPENJDK_$1_BUNDLE_PLATFORM)
if test "x$COMPILE_TYPE" = "xcross"; then
@@ -493,6 +533,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
fi
AC_SUBST(HOTSPOT_$1_CPU_DEFINE)
+ HOTSPOT_$1_LIBC=$OPENJDK_$1_LIBC
+ AC_SUBST(HOTSPOT_$1_LIBC)
+
# For historical reasons, the OS include directories have odd names.
OPENJDK_$1_OS_INCLUDE_SUBDIR="$OPENJDK_TARGET_OS"
if test "x$OPENJDK_TARGET_OS" = "xwindows"; then
@@ -518,9 +561,11 @@ AC_DEFUN([PLATFORM_SET_RELEASE_FILE_OS_VALUES],
RELEASE_FILE_OS_NAME="AIX"
fi
RELEASE_FILE_OS_ARCH=${OPENJDK_TARGET_CPU}
+ RELEASE_FILE_LIBC=${OPENJDK_TARGET_LIBC}
AC_SUBST(RELEASE_FILE_OS_NAME)
AC_SUBST(RELEASE_FILE_OS_ARCH)
+ AC_SUBST(RELEASE_FILE_LIBC)
])
AC_DEFUN([PLATFORM_SET_MODULE_TARGET_OS_VALUES],
diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in
index 14d7a18a0e8..63dc9a5767d 100644
--- a/make/autoconf/spec.gmk.in
+++ b/make/autoconf/spec.gmk.in
@@ -80,6 +80,8 @@ OPENJDK_TARGET_CPU_ARCH:=@OPENJDK_TARGET_CPU_ARCH@
OPENJDK_TARGET_CPU_BITS:=@OPENJDK_TARGET_CPU_BITS@
OPENJDK_TARGET_CPU_ENDIAN:=@OPENJDK_TARGET_CPU_ENDIAN@
+OPENJDK_TARGET_LIBC:=@OPENJDK_TARGET_LIBC@
+
COMPILE_TYPE:=@COMPILE_TYPE@
# Legacy support
@@ -95,6 +97,8 @@ HOTSPOT_TARGET_CPU := @HOTSPOT_TARGET_CPU@
HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_TARGET_CPU_ARCH@
HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_TARGET_CPU_DEFINE@
+HOTSPOT_TARGET_LIBC := @HOTSPOT_TARGET_LIBC@
+
OPENJDK_TARGET_BUNDLE_PLATFORM:=@OPENJDK_TARGET_BUNDLE_PLATFORM@
JDK_ARCH_ABI_PROP_NAME := @JDK_ARCH_ABI_PROP_NAME@
@@ -109,6 +113,8 @@ OPENJDK_BUILD_CPU_ARCH:=@OPENJDK_BUILD_CPU_ARCH@
OPENJDK_BUILD_CPU_BITS:=@OPENJDK_BUILD_CPU_BITS@
OPENJDK_BUILD_CPU_ENDIAN:=@OPENJDK_BUILD_CPU_ENDIAN@
+OPENJDK_BUILD_LIBC:=@OPENJDK_BUILD_LIBC@
+
OPENJDK_BUILD_OS_INCLUDE_SUBDIR:=@OPENJDK_TARGET_OS_INCLUDE_SUBDIR@
# Target platform value in ModuleTarget class file attribute.
@@ -117,6 +123,7 @@ OPENJDK_MODULE_TARGET_PLATFORM:=@OPENJDK_MODULE_TARGET_PLATFORM@
# OS_* properties in release file
RELEASE_FILE_OS_NAME:=@RELEASE_FILE_OS_NAME@
RELEASE_FILE_OS_ARCH:=@RELEASE_FILE_OS_ARCH@
+RELEASE_FILE_LIBC:=@RELEASE_FILE_LIBC@
SOURCE_DATE := @SOURCE_DATE@
ENABLE_REPRODUCIBLE_BUILD := @ENABLE_REPRODUCIBLE_BUILD@
@@ -637,6 +644,7 @@ JARSIGNER=@FIXPATH@ $(JARSIGNER_CMD)
BUILD_JAVA_FLAGS := @BOOTCYCLE_JVM_ARGS_BIG@
BUILD_JAVA=@FIXPATH@ $(BUILD_JDK)/bin/java $(BUILD_JAVA_FLAGS)
+BUILD_JAVAC=@FIXPATH@ $(BUILD_JDK)/bin/javac
BUILD_JAR=@FIXPATH@ $(BUILD_JDK)/bin/jar
# Interim langtools modules and arguments
@@ -751,7 +759,6 @@ TAR_SUPPORTS_TRANSFORM:=@TAR_SUPPORTS_TRANSFORM@
# Build setup
ENABLE_AOT:=@ENABLE_AOT@
-ENABLE_INTREE_EC:=@ENABLE_INTREE_EC@
USE_EXTERNAL_LIBJPEG:=@USE_EXTERNAL_LIBJPEG@
USE_EXTERNAL_LIBGIF:=@USE_EXTERNAL_LIBGIF@
USE_EXTERNAL_LIBZ:=@USE_EXTERNAL_LIBZ@
@@ -869,10 +876,14 @@ INTERIM_JMODS_DIR := $(SUPPORT_OUTPUTDIR)/interim-jmods
INTERIM_IMAGE_DIR := $(SUPPORT_OUTPUTDIR)/interim-image
# Docs image
-DOCS_IMAGE_SUBDIR := docs
-DOCS_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_IMAGE_SUBDIR)
+DOCS_JDK_IMAGE_SUBDIR := docs
+DOCS_JDK_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JDK_IMAGE_SUBDIR)
+DOCS_JAVASE_IMAGE_SUBDIR := docs-javase
+DOCS_JAVASE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JAVASE_IMAGE_SUBDIR)
+DOCS_REFERENCE_IMAGE_SUBDIR := docs-reference
+DOCS_REFERENCE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_REFERENCE_IMAGE_SUBDIR)
# Output docs directly into image
-DOCS_OUTPUTDIR := $(DOCS_IMAGE_DIR)
+DOCS_OUTPUTDIR := $(DOCS_JDK_IMAGE_DIR)
# Static libs image
STATIC_LIBS_IMAGE_SUBDIR := static-libs
@@ -915,7 +926,9 @@ JRE_BUNDLE_NAME := jre-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
JDK_SYMBOLS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin$(DEBUG_PART)-symbols.tar.gz
TEST_DEMOS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests-demos$(DEBUG_PART).tar.gz
TEST_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests$(DEBUG_PART).tar.gz
-DOCS_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_JDK_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_JAVASE_BUNDLE_NAME := javase-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
+DOCS_REFERENCE_BUNDLE_NAME := jdk-reference-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz
STATIC_LIBS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs$(DEBUG_PART).tar.gz
JCOV_BUNDLE_NAME := jdk-jcov-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION)
@@ -924,7 +937,9 @@ JRE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JRE_BUNDLE_NAME)
JDK_SYMBOLS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JDK_SYMBOLS_BUNDLE_NAME)
TEST_DEMOS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_DEMOS_BUNDLE_NAME)
TEST_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_BUNDLE_NAME)
-DOCS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_BUNDLE_NAME)
+DOCS_JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JDK_BUNDLE_NAME)
+DOCS_JAVASE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JAVASE_BUNDLE_NAME)
+DOCS_REFERENCE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_REFERENCE_BUNDLE_NAME)
JCOV_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JCOV_BUNDLE_NAME)
# This macro is called to allow inclusion of closed source counterparts.
diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk
index e0b1fb003d4..bedb971115b 100644
--- a/make/common/JavaCompilation.gmk
+++ b/make/common/JavaCompilation.gmk
@@ -202,6 +202,13 @@ define SetupJavaCompilationBody
# If unspecified, default to the new jdk we're building
$1_TARGET_RELEASE := $$(TARGET_RELEASE_BOOTJDK)
endif
+ else ifeq ($$($1_COMPILER), buildjdk)
+ $1_JAVAC_CMD := $$(BUILD_JAVAC)
+
+ ifeq ($$($1_TARGET_RELEASE), )
+ # If unspecified, default to the new jdk we're building
+ $1_TARGET_RELEASE := $$(TARGET_RELEASE_NEWJDK)
+ endif
else ifeq ($$($1_COMPILER), interim)
# Use java server if it is enabled, and the user does not want a specialized
# class path.
@@ -304,9 +311,11 @@ define SetupJavaCompilationBody
ifneq ($$($1_KEEP_DUPS), true)
# Remove duplicate source files by keeping the first found of each duplicate.
# This allows for automatic overrides with custom or platform specific versions
- # source files.
+ # source files. Need to call DoubleDollar as we have java classes with '$' in
+ # their names.
$1_SRCS := $$(strip $$(foreach s, $$($1_SRCS), \
- $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), $$(s))) \
+ $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), \
+ $$(call DoubleDollar, $$(s)))) \
$$(if $$($1_$$(relative_src)), \
, \
$$(eval $1_$$(relative_src) := 1) $$(s))))
diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index 21134966dc0..6a963ac2c49 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -77,8 +77,10 @@ ifeq ($(STATIC_LIBS), true)
FindStaticLib =
endif
+# Returns the module specific java header dir if it exists.
+# Param 1 - module name
GetJavaHeaderDir = \
- $(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1))
+ $(if $(strip $1),$(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1)))
# Process a dir description such as "java.base:headers" into a set of proper absolute paths.
ProcessDir = \
@@ -123,15 +125,27 @@ JDK_RCFLAGS=$(RCFLAGS) \
SetupJdkLibrary = $(NamedParamsMacroTemplate)
define SetupJdkLibraryBody
ifeq ($$($1_OUTPUT_DIR), )
- $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE))
+ ifneq ($$(MODULE), )
+ $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE))
+ else
+ $$(error Must specify OUTPUT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_OBJECT_DIR), )
- $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME)
+ ifneq ($$(MODULE), )
+ $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME)
+ else
+ $$(error Must specify OBJECT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_SRC), )
- $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME))
+ ifneq ($$(MODULE), )
+ $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME))
+ else
+ $$(error Must specify SRC in a MODULE free context)
+ endif
else
$1_SRC := $$(foreach dir, $$($1_SRC), $$(call ProcessDir, $$(dir)))
endif
@@ -165,7 +179,8 @@ define SetupJdkLibraryBody
ifneq ($$($1_HEADERS_FROM_SRC), false)
$1_SRC_HEADER_FLAGS := $$(addprefix -I, $$(wildcard $$($1_SRC)))
endif
- # Always add the java header dir
+
+ # Add the module specific java header dir
$1_SRC_HEADER_FLAGS += $$(addprefix -I, $$(call GetJavaHeaderDir, $$(MODULE)))
ifneq ($$($1_EXTRA_HEADER_DIRS), )
@@ -203,11 +218,19 @@ define SetupJdkExecutableBody
$1_TYPE := EXECUTABLE
ifeq ($$($1_OUTPUT_DIR), )
- $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE))
+ ifneq ($$(MODULE), )
+ $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE))
+ else
+ $$(error Must specify OUTPUT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_OBJECT_DIR), )
- $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME)
+ ifneq ($$(MODULE), )
+ $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME)
+ else
+ $$(error Must specify OBJECT_DIR in a MODULE free context)
+ endif
endif
ifeq ($$($1_VERSIONINFO_RESOURCE), )
diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk
index 72e19840501..10aacff4726 100644
--- a/make/common/Modules.gmk
+++ b/make/common/Modules.gmk
@@ -59,6 +59,7 @@ BOOT_MODULES += \
java.security.sasl \
java.xml \
jdk.incubator.foreign \
+ jdk.incubator.vector \
jdk.internal.vm.ci \
jdk.jfr \
jdk.management \
@@ -124,7 +125,7 @@ endif
JRE_TOOL_MODULES += \
jdk.jdwp.agent \
- jdk.incubator.jpackage \
+ jdk.jpackage \
#
################################################################################
@@ -144,7 +145,8 @@ DOCS_MODULES += \
jdk.editpad \
jdk.hotspot.agent \
jdk.httpserver \
- jdk.incubator.jpackage \
+ jdk.jpackage \
+ jdk.incubator.vector \
jdk.jartool \
jdk.javadoc \
jdk.jcmd \
@@ -226,7 +228,7 @@ endif
# jpackage is only on windows, macosx, and linux
ifeq ($(call isTargetOs, windows macosx linux), false)
- MODULES_FILTER += jdk.incubator.jpackage
+ MODULES_FILTER += jdk.jpackage
endif
################################################################################
diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
index e70d6617745..36460fee4b4 100644
--- a/make/conf/jib-profiles.js
+++ b/make/conf/jib-profiles.js
@@ -251,6 +251,8 @@ var getJibProfilesCommon = function (input, data) {
configure_args: concat("--enable-jtreg-failure-handler",
"--with-exclude-translations=de,es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK",
"--disable-manpages",
+ "--disable-jvm-feature-aot",
+ "--disable-jvm-feature-graal",
"--disable-jvm-feature-shenandoahgc",
versionArgs(input, common))
};
@@ -404,12 +406,11 @@ var getJibProfilesProfiles = function (input, common, data) {
"linux-x64": {
target_os: "linux",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "graphviz", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "graphviz", "pandoc"],
configure_args: concat(common.configure_args_64bit,
- "--enable-full-docs", "--with-zlib=system",
+ "--with-zlib=system", "--disable-dtrace",
(isWsl(input) ? [ "--host=x86_64-unknown-linux-gnu",
"--build=x86_64-unknown-linux-gnu" ] : [])),
- default_make_targets: ["docs-bundles"],
},
"linux-x86": {
@@ -424,7 +425,7 @@ var getJibProfilesProfiles = function (input, common, data) {
"macosx-x64": {
target_os: "macosx",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "pandoc"],
configure_args: concat(common.configure_args_64bit, "--with-zlib=system",
"--with-macosx-version-max=10.9.0",
// Use system SetFile instead of the one in the devkit as the
@@ -435,7 +436,7 @@ var getJibProfilesProfiles = function (input, common, data) {
"windows-x64": {
target_os: "windows",
target_cpu: "x64",
- dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"],
+ dependencies: ["devkit", "gtest", "pandoc"],
configure_args: concat(common.configure_args_64bit),
},
@@ -455,8 +456,6 @@ var getJibProfilesProfiles = function (input, common, data) {
configure_args: [
"--openjdk-target=aarch64-linux-gnu",
"--disable-jvm-feature-jvmci",
- "--disable-jvm-feature-graal",
- "--disable-jvm-feature-aot",
],
},
@@ -680,20 +679,47 @@ var getJibProfilesProfiles = function (input, common, data) {
common.debug_profile_artifacts(artifactData[name]));
});
- profilesArtifacts = {
- "linux-x64": {
+ buildJdkDep = input.build_os + "-" + input.build_cpu + ".jdk";
+ docsProfiles = {
+ "docs": {
+ target_os: input.build_os,
+ target_cpu: input.build_cpu,
+ dependencies: [
+ "boot_jdk", "devkit", "graphviz", "pandoc", buildJdkDep,
+ ],
+ configure_args: concat(
+ "--enable-full-docs",
+ versionArgs(input, common),
+ "--with-build-jdk=" + input.get(buildJdkDep, "home_path")
+ + (input.build_os == "macosx" ? "/Contents/Home" : "")
+ ),
+ default_make_targets: ["all-docs-bundles"],
artifacts: {
doc_api_spec: {
- local: "bundles/\\(jdk.*doc-api-spec.tar.gz\\)",
+ local: "bundles/\\(jdk-" + data.version + ".*doc-api-spec.tar.gz\\)",
remote: [
"bundles/common/jdk-" + data.version + "_doc-api-spec.tar.gz",
"bundles/common/\\1"
],
},
+ javase_doc_api_spec: {
+ local: "bundles/\\(javase-" + data.version + ".*doc-api-spec.tar.gz\\)",
+ remote: [
+ "bundles/common/javase-" + data.version + "_doc-api-spec.tar.gz",
+ "bundles/common/\\1"
+ ],
+ },
+ reference_doc_api_spec: {
+ local: "bundles/\\(jdk-reference-" + data.version + ".*doc-api-spec.tar.gz\\)",
+ remote: [
+ "bundles/common/jdk-reference-" + data.version + "_doc-api-spec.tar.gz",
+ "bundles/common/\\1"
+ ],
+ },
}
}
};
- profiles = concatObjects(profiles, profilesArtifacts);
+ profiles = concatObjects(profiles, docsProfiles);
// Generate open only profiles for all the main and debug profiles.
// Rewrite artifact remote paths by adding "openjdk/GPL".
@@ -960,7 +986,7 @@ var getJibProfilesDependencies = function (input, common) {
var devkit_platform_revisions = {
linux_x64: "gcc10.2.0-OL6.4+1.0",
- macosx_x64: "Xcode11.3.1-MacOSX10.15+1.0",
+ macosx_x64: "Xcode11.3.1-MacOSX10.15+1.1",
windows_x64: "VS2019-16.7.2+1.0",
linux_aarch64: "gcc10.2.0-OL7.6+1.0",
linux_arm: "gcc8.2.0-Fedora27+1.0",
@@ -1126,15 +1152,6 @@ var getJibProfilesDependencies = function (input, common) {
configure_args: "",
},
- graalunit_lib: {
- organization: common.organization,
- ext: "zip",
- revision: "619_Apr_12_2018",
- module: "graalunit-lib",
- configure_args: "--with-graalunit-lib=" + input.get("graalunit_lib", "install_path"),
- environment_name: "GRAALUNIT_LIB"
- },
-
gtest: {
organization: common.organization,
ext: "tar.gz",
diff --git a/make/data/blacklistedcertsconverter/blacklisted.certs.pem b/make/data/blacklistedcertsconverter/blacklisted.certs.pem
index 191e94e12a5..688becbc493 100644
--- a/make/data/blacklistedcertsconverter/blacklisted.certs.pem
+++ b/make/data/blacklistedcertsconverter/blacklisted.certs.pem
@@ -1,8 +1,7 @@
#! java BlacklistedCertsConverter SHA-256
-# The line above must be the first line of the blacklisted.certs.pem
-# file inside src/share/lib/security/. It will be ignored if added in
-# src/closed/share/lib/security/blacklisted.certs.pem.
+# The line above must be the first line of this file. Do not
+# remove it.
// Subject: CN=Digisign Server ID (Enrich),
// OU=457608-K,
diff --git a/make/data/tzdata/VERSION b/make/data/tzdata/VERSION
index e96a6d78497..94ba7462f2e 100644
--- a/make/data/tzdata/VERSION
+++ b/make/data/tzdata/VERSION
@@ -21,4 +21,4 @@
# or visit www.oracle.com if you need additional information or have any
# questions.
#
-tzdata2020a
+tzdata2020d
diff --git a/make/data/tzdata/africa b/make/data/tzdata/africa
index 7947bc55b00..e1c3d8929e6 100644
--- a/make/data/tzdata/africa
+++ b/make/data/tzdata/africa
@@ -87,7 +87,7 @@
# Corrections are welcome.
# Algeria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Algeria 1916 only - Jun 14 23:00s 1:00 S
Rule Algeria 1916 1919 - Oct Sun>=1 23:00s 0 -
Rule Algeria 1917 only - Mar 24 23:00s 1:00 S
@@ -110,10 +110,9 @@ Rule Algeria 1978 only - Mar 24 1:00 1:00 S
Rule Algeria 1978 only - Sep 22 3:00 0 -
Rule Algeria 1980 only - Apr 25 0:00 1:00 S
Rule Algeria 1980 only - Oct 31 2:00 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
+# See Europe/Paris for PMT-related transitions.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01
+Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 16
0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
0:00 Algeria WE%sT 1940 Feb 25 2:00
1:00 Algeria CE%sT 1946 Oct 7
@@ -199,7 +198,7 @@ Link Africa/Abidjan Atlantic/St_Helena # St Helena
# Egypt was mean noon at the Great Pyramid, 2:04:30.5, but apparently this
# did not apply to Cairo, Alexandria, or Port Said.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Egypt 1940 only - Jul 15 0:00 1:00 S
Rule Egypt 1940 only - Oct 1 0:00 0 -
Rule Egypt 1941 only - Apr 15 0:00 1:00 S
@@ -434,7 +433,7 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct
# now Ghana observed different DST regimes in different years. For
# lack of better info, use Shanks except treat the minus sign as a
# typo, and assume DST started in 1920 not 1936.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Ghana 1920 1942 - Sep 1 0:00 0:20 -
Rule Ghana 1920 1942 - Dec 31 0:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -524,7 +523,7 @@ Zone Africa/Monrovia -0:43:08 - LMT 1882
# From Paul Eggert (2013-10-25):
# For now, assume they're reverting to the pre-2012 rules of permanent UT +02.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Libya 1951 only - Oct 14 2:00 1:00 S
Rule Libya 1952 only - Jan 1 0:00 0 -
Rule Libya 1953 only - Oct 9 2:00 1:00 S
@@ -647,7 +646,7 @@ Zone Africa/Tripoli 0:52:44 - LMT 1920
# "The trial ended on March 29, 2009, when the clocks moved back by one hour
# at 2am (or 02:00) local time..."
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mauritius 1982 only - Oct 10 0:00 1:00 -
Rule Mauritius 1983 only - Mar 21 0:00 0 -
Rule Mauritius 2008 only - Oct lastSun 2:00 1:00 -
@@ -898,17 +897,30 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis
# https://maroc-diplomatique.net/maroc-le-retour-a-lheure-gmt-est-prevu-dimanche-prochain/
# http://aujourdhui.ma/actualite/gmt1-retour-a-lheure-normale-dimanche-prochain-1
#
-# From Paul Eggert (2020-04-14):
+# From Milamber (2020-05-31)
+# In Morocco (where I live), the end of Ramadan (Arabic month) is followed by
+# the Eid al-Fitr, and concretely it's 1 or 2 day offs for the people (with
+# traditional visiting of family, big lunches/dinners, etc.). So for this
+# year the astronomical calculations don't include the following 2 days off in
+# the calc. These 2 days fall in a Sunday/Monday, so it's not acceptable by
+# people to have a time shift during these 2 days off. Perhaps you can modify
+# the (predicted) rules for next years: if the end of Ramadan is a (probable)
+# Friday or Saturday (and so the 2 days off are on a weekend), the next time
+# shift will be the next weekend.
+#
+# From Paul Eggert (2020-05-31):
# For now, guess that in the future Morocco will fall back at 03:00
# the last Sunday before Ramadan, and spring forward at 02:00 the
-# first Sunday after the day after Ramadan. To implement this,
-# transition dates for 2021 through 2087 were determined by running
-# the following program under GNU Emacs 26.3.
-# (let ((islamic-year 1442))
+# first Sunday after two days after Ramadan. To implement this,
+# transition dates and times for 2019 through 2087 were determined by
+# running the following program under GNU Emacs 26.3. (This algorithm
+# also produces the correct transition dates for 2016 through 2018,
+# though the times differ due to Morocco's time zone change in 2018.)
+# (let ((islamic-year 1440))
# (require 'cal-islam)
# (while (< islamic-year 1511)
# (let ((a (calendar-islamic-to-absolute (list 9 1 islamic-year)))
-# (b (1+ (calendar-islamic-to-absolute (list 10 1 islamic-year))))
+# (b (+ 2 (calendar-islamic-to-absolute (list 10 1 islamic-year))))
# (sunday 0))
# (while (/= sunday (mod (setq a (1- a)) 7)))
# (while (/= sunday (mod b 7))
@@ -923,7 +935,7 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis
# (car (cdr (cdr b))) (calendar-month-name (car b) t) (car (cdr b)))))
# (setq islamic-year (+ 1 islamic-year))))
-# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Morocco 1939 only - Sep 12 0:00 1:00 -
Rule Morocco 1939 only - Nov 19 0:00 0 -
Rule Morocco 1940 only - Feb 25 0:00 1:00 -
@@ -974,7 +986,7 @@ Rule Morocco 2021 only - May 16 2:00 0 -
Rule Morocco 2022 only - Mar 27 3:00 -1:00 -
Rule Morocco 2022 only - May 8 2:00 0 -
Rule Morocco 2023 only - Mar 19 3:00 -1:00 -
-Rule Morocco 2023 only - Apr 23 2:00 0 -
+Rule Morocco 2023 only - Apr 30 2:00 0 -
Rule Morocco 2024 only - Mar 10 3:00 -1:00 -
Rule Morocco 2024 only - Apr 14 2:00 0 -
Rule Morocco 2025 only - Feb 23 3:00 -1:00 -
@@ -990,7 +1002,7 @@ Rule Morocco 2029 only - Feb 18 2:00 0 -
Rule Morocco 2029 only - Dec 30 3:00 -1:00 -
Rule Morocco 2030 only - Feb 10 2:00 0 -
Rule Morocco 2030 only - Dec 22 3:00 -1:00 -
-Rule Morocco 2031 only - Jan 26 2:00 0 -
+Rule Morocco 2031 only - Feb 2 2:00 0 -
Rule Morocco 2031 only - Dec 14 3:00 -1:00 -
Rule Morocco 2032 only - Jan 18 2:00 0 -
Rule Morocco 2032 only - Nov 28 3:00 -1:00 -
@@ -1006,7 +1018,7 @@ Rule Morocco 2036 only - Nov 23 2:00 0 -
Rule Morocco 2037 only - Oct 4 3:00 -1:00 -
Rule Morocco 2037 only - Nov 15 2:00 0 -
Rule Morocco 2038 only - Sep 26 3:00 -1:00 -
-Rule Morocco 2038 only - Oct 31 2:00 0 -
+Rule Morocco 2038 only - Nov 7 2:00 0 -
Rule Morocco 2039 only - Sep 18 3:00 -1:00 -
Rule Morocco 2039 only - Oct 23 2:00 0 -
Rule Morocco 2040 only - Sep 2 3:00 -1:00 -
@@ -1022,7 +1034,7 @@ Rule Morocco 2044 only - Aug 28 2:00 0 -
Rule Morocco 2045 only - Jul 9 3:00 -1:00 -
Rule Morocco 2045 only - Aug 20 2:00 0 -
Rule Morocco 2046 only - Jul 1 3:00 -1:00 -
-Rule Morocco 2046 only - Aug 5 2:00 0 -
+Rule Morocco 2046 only - Aug 12 2:00 0 -
Rule Morocco 2047 only - Jun 23 3:00 -1:00 -
Rule Morocco 2047 only - Jul 28 2:00 0 -
Rule Morocco 2048 only - Jun 7 3:00 -1:00 -
@@ -1038,7 +1050,7 @@ Rule Morocco 2052 only - Jun 2 2:00 0 -
Rule Morocco 2053 only - Apr 13 3:00 -1:00 -
Rule Morocco 2053 only - May 25 2:00 0 -
Rule Morocco 2054 only - Apr 5 3:00 -1:00 -
-Rule Morocco 2054 only - May 10 2:00 0 -
+Rule Morocco 2054 only - May 17 2:00 0 -
Rule Morocco 2055 only - Mar 28 3:00 -1:00 -
Rule Morocco 2055 only - May 2 2:00 0 -
Rule Morocco 2056 only - Mar 12 3:00 -1:00 -
@@ -1054,7 +1066,7 @@ Rule Morocco 2060 only - Mar 7 2:00 0 -
Rule Morocco 2061 only - Jan 16 3:00 -1:00 -
Rule Morocco 2061 only - Feb 27 2:00 0 -
Rule Morocco 2062 only - Jan 8 3:00 -1:00 -
-Rule Morocco 2062 only - Feb 12 2:00 0 -
+Rule Morocco 2062 only - Feb 19 2:00 0 -
Rule Morocco 2062 only - Dec 31 3:00 -1:00 -
Rule Morocco 2063 only - Feb 4 2:00 0 -
Rule Morocco 2063 only - Dec 16 3:00 -1:00 -
@@ -1070,7 +1082,7 @@ Rule Morocco 2067 only - Dec 11 2:00 0 -
Rule Morocco 2068 only - Oct 21 3:00 -1:00 -
Rule Morocco 2068 only - Dec 2 2:00 0 -
Rule Morocco 2069 only - Oct 13 3:00 -1:00 -
-Rule Morocco 2069 only - Nov 17 2:00 0 -
+Rule Morocco 2069 only - Nov 24 2:00 0 -
Rule Morocco 2070 only - Oct 5 3:00 -1:00 -
Rule Morocco 2070 only - Nov 9 2:00 0 -
Rule Morocco 2071 only - Sep 20 3:00 -1:00 -
@@ -1086,7 +1098,7 @@ Rule Morocco 2075 only - Sep 15 2:00 0 -
Rule Morocco 2076 only - Jul 26 3:00 -1:00 -
Rule Morocco 2076 only - Sep 6 2:00 0 -
Rule Morocco 2077 only - Jul 18 3:00 -1:00 -
-Rule Morocco 2077 only - Aug 22 2:00 0 -
+Rule Morocco 2077 only - Aug 29 2:00 0 -
Rule Morocco 2078 only - Jul 10 3:00 -1:00 -
Rule Morocco 2078 only - Aug 14 2:00 0 -
Rule Morocco 2079 only - Jun 25 3:00 -1:00 -
@@ -1096,13 +1108,13 @@ Rule Morocco 2080 only - Jul 21 2:00 0 -
Rule Morocco 2081 only - Jun 1 3:00 -1:00 -
Rule Morocco 2081 only - Jul 13 2:00 0 -
Rule Morocco 2082 only - May 24 3:00 -1:00 -
-Rule Morocco 2082 only - Jun 28 2:00 0 -
+Rule Morocco 2082 only - Jul 5 2:00 0 -
Rule Morocco 2083 only - May 16 3:00 -1:00 -
Rule Morocco 2083 only - Jun 20 2:00 0 -
Rule Morocco 2084 only - Apr 30 3:00 -1:00 -
Rule Morocco 2084 only - Jun 11 2:00 0 -
Rule Morocco 2085 only - Apr 22 3:00 -1:00 -
-Rule Morocco 2085 only - May 27 2:00 0 -
+Rule Morocco 2085 only - Jun 3 2:00 0 -
Rule Morocco 2086 only - Apr 14 3:00 -1:00 -
Rule Morocco 2086 only - May 19 2:00 0 -
Rule Morocco 2087 only - Mar 30 3:00 -1:00 -
@@ -1203,7 +1215,7 @@ Link Africa/Maputo Africa/Lusaka # Zambia
# Use plain "WAT" and "CAT" for the time zone abbreviations, to be compatible
# with Namibia's neighbors.
-# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Vanguard section, for zic and other parsers that support negative DST.
Rule Namibia 1994 only - Mar 21 0:00 -1:00 WAT
Rule Namibia 1994 2017 - Sep Sun>=1 2:00 0 CAT
@@ -1326,7 +1338,7 @@ Zone Indian/Mahe 3:41:48 - LMT 1906 Jun # Victoria
# See Africa/Nairobi.
# South Africa
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule SA 1942 1943 - Sep Sun>=15 2:00 1:00 -
Rule SA 1943 1944 - Mar Sun>=15 2:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1359,7 +1371,7 @@ Link Africa/Johannesburg Africa/Mbabane # Eswatini
# Abdalla of NTC, archived at:
# https://mm.icann.org/pipermail/tz/2017-October/025333.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Sudan 1970 only - May 1 0:00 1:00 S
Rule Sudan 1970 1985 - Oct 15 0:00 0 -
Rule Sudan 1971 only - Apr 30 0:00 1:00 S
@@ -1447,7 +1459,7 @@ Zone Africa/Juba 2:06:28 - LMT 1931
# http://www.almadenahnews.com/newss/news.php?c=118&id=38036
# http://www.worldtimezone.com/dst_news/dst_news_tunis02.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Tunisia 1939 only - Apr 15 23:00s 1:00 S
Rule Tunisia 1939 only - Nov 18 23:00s 0 -
Rule Tunisia 1940 only - Feb 25 23:00s 1:00 S
@@ -1474,9 +1486,7 @@ Rule Tunisia 2005 only - Sep 30 1:00s 0 -
Rule Tunisia 2006 2008 - Mar lastSun 2:00s 1:00 S
Rule Tunisia 2006 2008 - Oct lastSun 2:00s 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
-# Shanks & Pottenger say the 1911 switch was on Mar 9; go with Howse's Mar 11.
+# See Europe/Paris for PMT-related transitions.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Africa/Tunis 0:40:44 - LMT 1881 May 12
0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
diff --git a/make/data/tzdata/antarctica b/make/data/tzdata/antarctica
index 6239f837e9f..509fadc29a9 100644
--- a/make/data/tzdata/antarctica
+++ b/make/data/tzdata/antarctica
@@ -93,15 +93,30 @@
# Australian Antarctica Division informed us that Casey changed time
# zone to UTC+11 in "the morning of 22nd October 2016".
+# From Steffen Thorsen (2020-10-02, as corrected):
+# Based on information we have received from the Australian Antarctic
+# Division, Casey station and Macquarie Island station will move to Tasmanian
+# daylight savings time on Sunday 4 October. This will take effect from 0001
+# hrs on Sunday 4 October 2020 and will mean Casey and Macquarie Island will
+# be on the same time zone as Hobart. Some past dates too for this 3 hour
+# time change back and forth between UTC+8 and UTC+11 for Casey:
+# - 2018 Oct 7 4:00 - 2019 Mar 17 3:00 - 2019 Oct 4 3:00 - 2020 Mar 8 3:00
+# and now - 2020 Oct 4 0:01
+
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Antarctica/Casey 0 - -00 1969
- 8:00 - +08 2009 Oct 18 2:00
+Zone Antarctica/Casey 0 - -00 1969
+ 8:00 - +08 2009 Oct 18 2:00
11:00 - +11 2010 Mar 5 2:00
- 8:00 - +08 2011 Oct 28 2:00
+ 8:00 - +08 2011 Oct 28 2:00
11:00 - +11 2012 Feb 21 17:00u
- 8:00 - +08 2016 Oct 22
+ 8:00 - +08 2016 Oct 22
11:00 - +11 2018 Mar 11 4:00
- 8:00 - +08
+ 8:00 - +08 2018 Oct 7 4:00
+ 11:00 - +11 2019 Mar 17 3:00
+ 8:00 - +08 2019 Oct 4 3:00
+ 11:00 - +11 2020 Mar 8 3:00
+ 8:00 - +08 2020 Oct 4 0:01
+ 11:00 - +11
Zone Antarctica/Davis 0 - -00 1957 Jan 13
7:00 - +07 1964 Nov
0 - -00 1969 Feb
@@ -247,7 +262,7 @@ Zone Antarctica/Syowa 0 - -00 1957 Jan 29
# suggested by Bengt-Inge Larsson comment them out for now, and approximate
# with only UTC and CEST. Uncomment them when 2014b is more prevalent.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
#Rule Troll 2005 max - Mar 1 1:00u 1:00 +01
Rule Troll 2005 max - Mar lastSun 1:00u 2:00 +02
#Rule Troll 2005 max - Oct lastSun 1:00u 1:00 +01
diff --git a/make/data/tzdata/asia b/make/data/tzdata/asia
index 0700aa46b41..acca6554fa2 100644
--- a/make/data/tzdata/asia
+++ b/make/data/tzdata/asia
@@ -93,7 +93,7 @@
###############################################################################
# These rules are stolen from the 'europe' file.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EUAsia 1981 max - Mar lastSun 1:00u 1:00 S
Rule EUAsia 1979 1995 - Sep lastSun 1:00u 0 -
Rule EUAsia 1996 max - Oct lastSun 1:00u 0 -
@@ -137,7 +137,7 @@ Zone Asia/Kabul 4:36:48 - LMT 1890
# or
# (brief)
# http://www.worldtimezone.com/dst_news/dst_news_armenia03.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Armenia 2011 only - Mar lastSun 2:00s 1:00 -
Rule Armenia 2011 only - Oct lastSun 2:00s 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -163,7 +163,7 @@ Zone Asia/Yerevan 2:58:00 - LMT 1924 May 2
# http://vestnikkavkaza.net/news/Azerbaijani-Cabinet-of-Ministers-cancels-daylight-saving-time.html
# http://en.apa.az/xeber_azerbaijan_abolishes_daylight_savings_ti_240862.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Azer 1997 2015 - Mar lastSun 4:00 1:00 -
Rule Azer 1997 2015 - Oct lastSun 5:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -250,7 +250,7 @@ Zone Asia/Baku 3:19:24 - LMT 1924 May 2
# http://www.thedailystar.net/newDesign/latest_news.php?nid=22817
# http://www.worldtimezone.com/dst_news/dst_news_bangladesh06.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Dhaka 2009 only - Jun 19 23:00 1:00 -
Rule Dhaka 2009 only - Dec 31 24:00 0 -
@@ -326,7 +326,7 @@ Zone Asia/Yangon 6:24:47 - LMT 1880 # or Rangoon
# generally esteemed a success, it was announced early in 1920 that it would
# not be repeated."
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Shang 1919 only - Apr 12 24:00 1:00 D
Rule Shang 1919 only - Sep 30 24:00 0 S
@@ -422,7 +422,7 @@ Rule Shang 1919 only - Sep 30 24:00 0 S
# the Yangtze river delta area during that period of time although the scope
# of such use will need to be investigated to determine.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Shang 1940 only - Jun 1 0:00 1:00 D
Rule Shang 1940 only - Oct 12 24:00 0 S
Rule Shang 1941 only - Mar 15 0:00 1:00 D
@@ -485,7 +485,7 @@ Rule Shang 1948 1949 - Sep 30 24:00 0 S #plan
# to begin on 17 April.
# http://data.people.com.cn/pic/101p/1988/04/1988041201.jpg
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule PRC 1986 only - May 4 2:00 1:00 D
Rule PRC 1986 1991 - Sep Sun>=11 2:00 0 S
Rule PRC 1987 1991 - Apr Sun>=11 2:00 1:00 D
@@ -869,7 +869,7 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928
# or dates for the 1942 and 1945 transitions.
# The Japanese occupation of Hong Kong began 1941-12-25.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule HK 1946 only - Apr 21 0:00 1:00 S
Rule HK 1946 only - Dec 1 3:30s 0 -
Rule HK 1947 only - Apr 13 3:30s 1:00 S
@@ -996,7 +996,7 @@ Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 0:36:42
# until 1945-09-21 at 01:00, overriding Shanks & Pottenger.
# Likewise, use Yu-Cheng Chuang's data for DST in Taiwan.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Taiwan 1946 only - May 15 0:00 1:00 D
Rule Taiwan 1946 only - Oct 1 0:00 0 S
Rule Taiwan 1947 only - Apr 15 0:00 1:00 D
@@ -1122,7 +1122,7 @@ Zone Asia/Taipei 8:06:00 - LMT 1896 Jan 1
# The 1904 decree says that Macau changed from the meridian of
# Fortaleza do Monte, presumably the basis for the 7:34:10 for LMT.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Macau 1942 1943 - Apr 30 23:00 1:00 -
Rule Macau 1942 only - Nov 17 23:00 0 -
Rule Macau 1943 only - Sep 30 23:00 0 S
@@ -1180,7 +1180,7 @@ Zone Asia/Macau 7:34:10 - LMT 1904 Oct 30
# Cyprus to remain united in time. Cyprus Mail 2017-10-17.
# https://cyprus-mail.com/2017/10/17/cyprus-remain-united-time/
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cyprus 1975 only - Apr 13 0:00 1:00 S
Rule Cyprus 1975 only - Oct 12 0:00 0 -
Rule Cyprus 1976 only - May 15 0:00 1:00 S
@@ -1557,7 +1557,7 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov
# be changed back to its previous state on the 24 hours of the
# thirtieth day of Shahrivar.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iran 1978 1980 - Mar 20 24:00 1:00 -
Rule Iran 1978 only - Oct 20 24:00 0 -
Rule Iran 1979 only - Sep 18 24:00 0 -
@@ -1699,7 +1699,7 @@ Zone Asia/Tehran 3:25:44 - LMT 1916
# We have published a short article in English about the change:
# https://www.timeanddate.com/news/time/iraq-dumps-daylight-saving.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iraq 1982 only - May 1 0:00 1:00 -
Rule Iraq 1982 1984 - Oct 1 0:00 0 -
Rule Iraq 1983 only - Mar 31 0:00 1:00 -
@@ -1722,6 +1722,10 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890
# Israel
+# For more info about the motivation for DST in Israel, see:
+# Barak Y. Israel's Daylight Saving Time controversy. Israel Affairs.
+# 2020-08-11. https://doi.org/10.1080/13537121.2020.1806564
+
# From Ephraim Silverberg (2001-01-11):
#
# I coined "IST/IDT" circa 1988. Until then there were three
@@ -1743,7 +1747,7 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890
# family is from India).
# From Shanks & Pottenger:
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1940 only - Jun 1 0:00 1:00 D
Rule Zion 1942 1944 - Nov 1 0:00 0 S
Rule Zion 1943 only - Apr 1 2:00 1:00 D
@@ -1835,7 +1839,7 @@ Rule Zion 1988 only - Sep 4 0:00 0 S
# (except in 2002) is three nights before Yom Kippur [Day of Atonement]
# (the eve of the 7th of Tishrei in the lunar Hebrew calendar).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1989 only - Apr 30 0:00 1:00 D
Rule Zion 1989 only - Sep 3 0:00 0 S
Rule Zion 1990 only - Mar 25 0:00 1:00 D
@@ -1851,7 +1855,7 @@ Rule Zion 1993 only - Sep 5 0:00 0 S
# Ministry of Interior, Jerusalem, Israel. The spokeswoman can be reached by
# calling the office directly at 972-2-6701447 or 972-2-6701448.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1994 only - Apr 1 0:00 1:00 D
Rule Zion 1994 only - Aug 28 0:00 0 S
Rule Zion 1995 only - Mar 31 0:00 1:00 D
@@ -1871,7 +1875,7 @@ Rule Zion 1995 only - Sep 3 0:00 0 S
#
# where YYYY is the relevant year.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 1996 only - Mar 15 0:00 1:00 D
Rule Zion 1996 only - Sep 16 0:00 0 S
Rule Zion 1997 only - Mar 21 0:00 1:00 D
@@ -1894,7 +1898,7 @@ Rule Zion 1999 only - Sep 3 2:00 0 S
#
# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2000 only - Apr 14 2:00 1:00 D
Rule Zion 2000 only - Oct 6 1:00 0 S
Rule Zion 2001 only - Apr 9 1:00 1:00 D
@@ -1916,7 +1920,7 @@ Rule Zion 2004 only - Sep 22 1:00 0 S
#
# ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+beyond.ps
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2005 2012 - Apr Fri<=1 2:00 1:00 D
Rule Zion 2005 only - Oct 9 2:00 0 S
Rule Zion 2006 only - Oct 1 2:00 0 S
@@ -1936,7 +1940,7 @@ Rule Zion 2012 only - Sep 23 2:00 0 S
# As of 2013, DST starts at 02:00 on the Friday before the last Sunday
# in March. DST ends at 02:00 on the last Sunday of October.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Zion 2013 max - Mar Fri>=23 2:00 1:00 D
Rule Zion 2013 max - Oct lastSun 2:00 0 S
@@ -2036,7 +2040,7 @@ Zone Asia/Jerusalem 2:20:54 - LMT 1880
# do in any POSIX or C platform. The "25:00" assumes zic from 2007 or later,
# which should be safe now.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Japan 1948 only - May Sat>=1 24:00 1:00 D
Rule Japan 1948 1951 - Sep Sat>=8 25:00 0 S
Rule Japan 1949 only - Apr Sat>=1 24:00 1:00 D
@@ -2113,7 +2117,7 @@ Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u
# From Paul Eggert (2013-12-11):
# As Steffen suggested, consider the past 21-month experiment to be DST.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Jordan 1973 only - Jun 6 0:00 1:00 S
Rule Jordan 1973 1975 - Oct 1 0:00 0 -
Rule Jordan 1974 1977 - May 1 0:00 1:00 S
@@ -2439,7 +2443,7 @@ Zone Asia/Oral 3:25:24 - LMT 1924 May 2 # or Ural'sk
# Our government cancels daylight saving time 6th of August 2005.
# From 2005-08-12 our GMT-offset is +6, w/o any daylight saving.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Kyrgyz 1992 1996 - Apr Sun>=7 0:00s 1:00 -
Rule Kyrgyz 1992 1996 - Sep lastSun 0:00 0 -
Rule Kyrgyz 1997 2005 - Mar lastSun 2:30 1:00 -
@@ -2495,7 +2499,7 @@ Zone Asia/Bishkek 4:58:24 - LMT 1924 May 2
# follow and continued to use GMT+9:00 for interoperability.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule ROK 1948 only - Jun 1 0:00 1:00 D
Rule ROK 1948 only - Sep 12 24:00 0 S
Rule ROK 1949 only - Apr 3 0:00 1:00 D
@@ -2583,7 +2587,7 @@ Zone Asia/Pyongyang 8:23:00 - LMT 1908 Apr 1
# Lebanon
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Lebanon 1920 only - Mar 28 0:00 1:00 S
Rule Lebanon 1920 only - Oct 25 0:00 0 -
Rule Lebanon 1921 only - Apr 3 0:00 1:00 S
@@ -2613,7 +2617,7 @@ Zone Asia/Beirut 2:22:00 - LMT 1880
2:00 Lebanon EE%sT
# Malaysia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NBorneo 1935 1941 - Sep 14 0:00 0:20 -
Rule NBorneo 1935 1941 - Dec 14 0:00 0 -
#
@@ -2758,7 +2762,7 @@ Zone Indian/Maldives 4:54:00 - LMT 1880 # Malé
# September daylight saving time ends. Source:
# http://zasag.mn/news/view/8969
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mongol 1983 1984 - Apr 1 0:00 1:00 -
Rule Mongol 1983 only - Oct 1 0:00 0 -
# Shanks & Pottenger and IATA SSIM say 1990s switches occurred at 00:00,
@@ -2946,7 +2950,7 @@ Zone Asia/Kathmandu 5:41:16 - LMT 1920
# "People laud PM's announcement to end DST"
# http://www.app.com.pk/en_/index.php?option=com_content&task=view&id=99374&Itemid=2
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Pakistan 2002 only - Apr Sun>=2 0:00 1:00 S
Rule Pakistan 2002 only - Oct Sun>=2 0:00 0 -
Rule Pakistan 2008 only - Jun 1 0:00 1:00 S
@@ -3240,15 +3244,42 @@ Zone Asia/Karachi 4:28:12 - LMT 1907
# From Sharef Mustafa (2019-10-18):
# Palestine summer time will end on midnight Oct 26th 2019 ...
-# http://www.palestinecabinet.gov.ps/website/ar/ViewDetails?ID=43948
#
-# From Paul Eggert (2019-04-10):
-# For now, guess spring-ahead transitions are March's last Friday at 00:00.
+# From Steffen Thorsen (2020-10-20):
+# Some sources such as these say, and display on clocks, that DST ended at
+# midnight last year...
+# https://www.amad.ps/ar/post/320006
+#
+# From Tim Parenti (2020-10-20):
+# The report of the Palestinian Cabinet meeting of 2019-10-14 confirms
+# a decision on (translated): "The start of the winter time in Palestine, by
+# delaying the clock by sixty minutes, starting from midnight on Friday /
+# Saturday corresponding to 26/10/2019."
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/43948
+
+# From Sharef Mustafa (2020-10-20):
+# As per the palestinian cabinet announcement yesterday , the day light saving
+# shall [end] on Oct 24th 2020 at 01:00AM by delaying the clock by 60 minutes.
+# http://www.palestinecabinet.gov.ps/portal/Meeting/Details/51584
+
+# From Tim Parenti (2020-10-20):
+# Predict future fall transitions at 01:00 on the Saturday preceding October's
+# last Sunday (i.e., Sat>=24). This is consistent with our predictions since
+# 2016, although the time of the change differed slightly in 2019.
+
+# From Pierre Cashon (2020-10-20):
+# The summer time this year started on March 28 at 00:00.
+# https://wafa.ps/ar_page.aspx?id=GveQNZa872839351758aGveQNZ
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/50284
+# The winter time in 2015 started on October 23 at 01:00.
+# https://wafa.ps/ar_page.aspx?id=CgpCdYa670694628582aCgpCdY
+# http://www.palestinecabinet.gov.ps/portal/meeting/details/27583
#
-# From Tim Parenti (2016-10-19):
-# Predict fall transitions on October's last Saturday at 01:00 from now on.
+# From Paul Eggert (2019-04-10):
+# For now, guess spring-ahead transitions are at 00:00 on the Saturday
+# preceding March's last Sunday (i.e., Sat>=24).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EgyptAsia 1957 only - May 10 0:00 1:00 S
Rule EgyptAsia 1957 1958 - Oct 1 0:00 0 -
Rule EgyptAsia 1958 only - May 1 0:00 1:00 S
@@ -3262,10 +3293,10 @@ Rule Palestine 2004 only - Oct 1 1:00 0 -
Rule Palestine 2005 only - Oct 4 2:00 0 -
Rule Palestine 2006 2007 - Apr 1 0:00 1:00 S
Rule Palestine 2006 only - Sep 22 0:00 0 -
-Rule Palestine 2007 only - Sep Thu>=8 2:00 0 -
+Rule Palestine 2007 only - Sep 13 2:00 0 -
Rule Palestine 2008 2009 - Mar lastFri 0:00 1:00 S
Rule Palestine 2008 only - Sep 1 0:00 0 -
-Rule Palestine 2009 only - Sep Fri>=1 1:00 0 -
+Rule Palestine 2009 only - Sep 4 1:00 0 -
Rule Palestine 2010 only - Mar 26 0:00 1:00 S
Rule Palestine 2010 only - Aug 11 0:00 0 -
Rule Palestine 2011 only - Apr 1 0:01 1:00 S
@@ -3274,12 +3305,16 @@ Rule Palestine 2011 only - Aug 30 0:00 1:00 S
Rule Palestine 2011 only - Sep 30 0:00 0 -
Rule Palestine 2012 2014 - Mar lastThu 24:00 1:00 S
Rule Palestine 2012 only - Sep 21 1:00 0 -
-Rule Palestine 2013 only - Sep Fri>=21 0:00 0 -
-Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 -
-Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S
+Rule Palestine 2013 only - Sep 27 0:00 0 -
+Rule Palestine 2014 only - Oct 24 0:00 0 -
+Rule Palestine 2015 only - Mar 28 0:00 1:00 S
+Rule Palestine 2015 only - Oct 23 1:00 0 -
Rule Palestine 2016 2018 - Mar Sat>=24 1:00 1:00 S
-Rule Palestine 2016 max - Oct lastSat 1:00 0 -
-Rule Palestine 2019 max - Mar lastFri 0:00 1:00 S
+Rule Palestine 2016 2018 - Oct Sat>=24 1:00 0 -
+Rule Palestine 2019 only - Mar 29 0:00 1:00 S
+Rule Palestine 2019 only - Oct Sat>=24 0:00 0 -
+Rule Palestine 2020 max - Mar Sat>=24 0:00 1:00 S
+Rule Palestine 2020 max - Oct Sat>=24 1:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Asia/Gaza 2:17:52 - LMT 1900 Oct
@@ -3348,7 +3383,7 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct
# influence of the sources. There is no current abbreviation for DST,
# so use "PDT", the usual American style.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Phil 1936 only - Nov 1 0:00 1:00 D
Rule Phil 1937 only - Feb 1 0:00 0 S
Rule Phil 1954 only - Apr 12 0:00 1:00 D
@@ -3496,7 +3531,7 @@ Zone Asia/Colombo 5:19:24 - LMT 1880
5:30 - +0530
# Syria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Syria 1920 1923 - Apr Sun>=15 2:00 1:00 S
Rule Syria 1920 1923 - Oct Sun>=1 2:00 0 -
Rule Syria 1962 only - Apr 29 2:00 1:00 S
diff --git a/make/data/tzdata/australasia b/make/data/tzdata/australasia
index e66d5ca4d79..1f0fd47959f 100644
--- a/make/data/tzdata/australasia
+++ b/make/data/tzdata/australasia
@@ -36,7 +36,7 @@
# Please see the notes below for the controversy about "EST" versus "AEST" etc.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Aus 1917 only - Jan 1 0:01 1:00 D
Rule Aus 1917 only - Mar 25 2:00 0 S
Rule Aus 1942 only - Jan 1 2:00 1:00 D
@@ -55,7 +55,7 @@ Zone Australia/Darwin 8:43:20 - LMT 1895 Feb
9:30 Aus AC%sT
# Western Australia
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AW 1974 only - Oct lastSun 2:00s 1:00 D
Rule AW 1975 only - Mar Sun>=1 2:00s 0 S
Rule AW 1983 only - Oct lastSun 2:00s 1:00 D
@@ -93,7 +93,7 @@ Zone Australia/Eucla 8:35:28 - LMT 1895 Dec
# applies to all of the Whitsundays.
# http://www.australia.gov.au/about-australia/australian-story/austn-islands
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AQ 1971 only - Oct lastSun 2:00s 1:00 D
Rule AQ 1972 only - Feb lastSun 2:00s 0 S
Rule AQ 1989 1991 - Oct lastSun 2:00s 1:00 D
@@ -109,7 +109,7 @@ Zone Australia/Lindeman 9:55:56 - LMT 1895
10:00 Holiday AE%sT
# South Australia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AS 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AS 1986 only - Oct 19 2:00s 1:00 D
Rule AS 1987 2007 - Oct lastSun 2:00s 1:00 D
@@ -137,7 +137,7 @@ Zone Australia/Adelaide 9:14:20 - LMT 1895 Feb
# http://www.bom.gov.au/climate/averages/tables/dst_times.shtml
# says King Island didn't observe DST from WWII until late 1971.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AT 1967 only - Oct Sun>=1 2:00s 1:00 D
Rule AT 1968 only - Mar lastSun 2:00s 0 S
Rule AT 1968 1985 - Oct lastSun 2:00s 1:00 D
@@ -170,7 +170,7 @@ Zone Australia/Currie 9:35:28 - LMT 1895 Sep
10:00 AT AE%sT
# Victoria
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AV 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AV 1972 only - Feb lastSun 2:00s 0 S
Rule AV 1973 1985 - Mar Sun>=1 2:00s 0 S
@@ -191,7 +191,7 @@ Zone Australia/Melbourne 9:39:52 - LMT 1895 Feb
10:00 AV AE%sT
# New South Wales
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule AN 1971 1985 - Oct lastSun 2:00s 1:00 D
Rule AN 1972 only - Feb 27 2:00s 0 S
Rule AN 1973 1981 - Mar Sun>=1 2:00s 0 S
@@ -220,7 +220,7 @@ Zone Australia/Broken_Hill 9:25:48 - LMT 1895 Feb
9:30 AS AC%sT
# Lord Howe Island
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule LH 1981 1984 - Oct lastSun 2:00 1:00 -
Rule LH 1982 1985 - Mar Sun>=1 2:00 0 -
Rule LH 1985 only - Oct lastSun 2:00 0:30 -
@@ -275,8 +275,9 @@ Zone Antarctica/Macquarie 0 - -00 1899 Nov
10:00 Aus AE%sT 1919 Apr 1 0:00s
0 - -00 1948 Mar 25
10:00 Aus AE%sT 1967
- 10:00 AT AE%sT 2010 Apr 4 3:00
- 11:00 - +11
+ 10:00 AT AE%sT 2010
+ 10:00 1:00 AEDT 2011
+ 10:00 AT AE%sT
# Christmas
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -403,7 +404,20 @@ Zone Indian/Cocos 6:27:40 - LMT 1900
# From Michael Deckers (2019-08-06):
# https://www.laws.gov.fj/LawsAsMade/downloadfile/848
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# From Raymond Kumar (2020-10-08):
+# [DST in Fiji] is from December 20th 2020, till 17th January 2021.
+# From Alan Mintz (2020-10-08):
+# https://www.laws.gov.fj/LawsAsMade/GetFile/1071
+# From Tim Parenti (2020-10-08):
+# https://www.fijivillage.com/news/Daylight-saving-from-Dec-20th-this-year-to-Jan-17th-2021-8rf4x5/
+# "Minister for Employment, Parveen Bala says they had never thought of
+# stopping daylight saving. He says it was just to decide on when it should
+# start and end. Bala says it is a short period..."
+# Since the end date is still in line with our ongoing predictions, assume for
+# now that the later-than-usual start date is a one-time departure from the
+# recent second Sunday in November pattern.
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Fiji 1998 1999 - Nov Sun>=1 2:00 1:00 -
Rule Fiji 1999 2000 - Feb lastSun 3:00 0 -
Rule Fiji 2009 only - Nov 29 2:00 1:00 -
@@ -414,7 +428,9 @@ Rule Fiji 2012 2013 - Jan Sun>=18 3:00 0 -
Rule Fiji 2014 only - Jan Sun>=18 2:00 0 -
Rule Fiji 2014 2018 - Nov Sun>=1 2:00 1:00 -
Rule Fiji 2015 max - Jan Sun>=12 3:00 0 -
-Rule Fiji 2019 max - Nov Sun>=8 2:00 1:00 -
+Rule Fiji 2019 only - Nov Sun>=8 2:00 1:00 -
+Rule Fiji 2020 only - Dec 20 2:00 1:00 -
+Rule Fiji 2021 max - Nov Sun>=8 2:00 1:00 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
Zone Pacific/Fiji 11:55:44 - LMT 1915 Oct 26 # Suva
12:00 Fiji +12/+13
@@ -432,7 +448,7 @@ Zone Pacific/Tahiti -9:58:16 - LMT 1912 Oct # Papeete
# Guam
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# http://guamlegislature.com/Public_Laws_5th/PL05-025.pdf
# http://documents.guam.gov/wp-content/uploads/E.O.-59-7-Guam-Daylight-Savings-Time-May-6-1959.pdf
Rule Guam 1959 only - Jun 27 2:00 1:00 D
@@ -543,7 +559,7 @@ Zone Pacific/Nauru 11:07:40 - LMT 1921 Jan 15 # Uaobe
12:00 - +12
# New Caledonia
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NC 1977 1978 - Dec Sun>=1 0:00 1:00 -
Rule NC 1978 1979 - Feb 27 0:00 0 -
Rule NC 1996 only - Dec 1 2:00s 1:00 -
@@ -558,7 +574,7 @@ Zone Pacific/Noumea 11:05:48 - LMT 1912 Jan 13 # Nouméa
# New Zealand
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NZ 1927 only - Nov 6 2:00 1:00 S
Rule NZ 1928 only - Mar 4 2:00 0 M
Rule NZ 1928 1933 - Oct Sun>=8 2:00 0:30 S
@@ -610,7 +626,7 @@ Link Pacific/Auckland Antarctica/McMurdo
# Cook Is
# From Shanks & Pottenger:
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cook 1978 only - Nov 12 0:00 0:30 -
Rule Cook 1979 1991 - Mar Sun>=1 0:00 0 -
Rule Cook 1979 1990 - Oct lastSun 0:00 0:30 -
@@ -755,7 +771,7 @@ Link Pacific/Pago_Pago Pacific/Midway # in US minor outlying islands
# That web page currently lists transitions for 2012/3 and 2013/4.
# Assume the pattern instituted in 2012 will continue indefinitely.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule WS 2010 only - Sep lastSun 0:00 1 -
Rule WS 2011 only - Apr Sat>=1 4:00 0 -
Rule WS 2011 only - Sep lastSat 3:00 1 -
@@ -799,7 +815,7 @@ Zone Pacific/Fakaofo -11:24:56 - LMT 1901
13:00 - +13
# Tonga
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Tonga 1999 only - Oct 7 2:00s 1:00 -
Rule Tonga 2000 only - Mar 19 2:00s 0 -
Rule Tonga 2000 2001 - Nov Sun>=1 2:00 1:00 -
@@ -880,7 +896,7 @@ Zone Pacific/Wake 11:06:28 - LMT 1901
# Vanuatu
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Vanuatu 1983 only - Sep 25 0:00 1:00 -
Rule Vanuatu 1984 1991 - Mar Sun>=23 0:00 0 -
Rule Vanuatu 1984 only - Oct 23 0:00 1:00 -
diff --git a/make/data/tzdata/europe b/make/data/tzdata/europe
index 8fed2cf5e98..adb260624dc 100644
--- a/make/data/tzdata/europe
+++ b/make/data/tzdata/europe
@@ -411,7 +411,7 @@
# http://www.irishstatutebook.ie/eli/1926/sro/919/made/en/print
# http://www.irishstatutebook.ie/eli/1947/sro/71/made/en/print
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Summer Time Act, 1916
Rule GB-Eire 1916 only - May 21 2:00s 1:00 BST
Rule GB-Eire 1916 only - Oct 1 2:00s 0 GMT
@@ -552,7 +552,7 @@ Link Europe/London Europe/Isle_of_Man
# The following is like GB-Eire and EU, except with standard time in
# summer and negative daylight saving time in winter. It is for when
# negative SAVE values are used.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Eire 1971 only - Oct 31 2:00u -1:00 -
Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 -
Rule Eire 1972 1980 - Oct Sun>=23 2:00u -1:00 -
@@ -589,7 +589,7 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2
# predecessor organization, the European Communities.
# For brevity they are called "EU rules" elsewhere in this file.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S
Rule EU 1977 only - Sep lastSun 1:00u 0 -
Rule EU 1978 only - Oct 1 1:00u 0 -
@@ -629,13 +629,13 @@ Rule C-Eur 1944 only - Oct 2 2:00s 0 -
# corrected in version 2008d). The circumstantial evidence is simply the
# tz database itself, as seen below:
#
-# Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
+# Zone Europe/Paris ...
# 0:00 France WE%sT 1945 Sep 16 3:00
#
-# Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
+# Zone Europe/Monaco ...
# 0:00 France WE%sT 1945 Sep 16 3:00
#
-# Zone Europe/Belgrade 1:22:00 - LMT 1884
+# Zone Europe/Belgrade ...
# 1:00 1:00 CEST 1945 Sep 16 2:00s
#
# Rule France 1945 only - Sep 16 3:00 0 -
@@ -681,7 +681,7 @@ Rule E-Eur 1996 max - Oct lastSun 0:00 0 -
#
# The 1917-1921 decree URLs are from Alexander Belopolsky (2016-08-23).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Russia 1917 only - Jul 1 23:00 1:00 MST # Moscow Summer Time
#
# Decree No. 142 (1917-12-22) http://istmat.info/node/28137
@@ -795,7 +795,7 @@ Zone EET 2:00 EU EE%sT
# Albania
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Albania 1940 only - Jun 16 0:00 1:00 S
Rule Albania 1942 only - Nov 2 3:00 0 -
Rule Albania 1943 only - Mar 29 2:00 1:00 S
@@ -849,7 +849,7 @@ Zone Europe/Andorra 0:06:04 - LMT 1901
# In 1946 the end of DST was on Monday, 7 October 1946, at 3:00 am.
# Shanks had this right. Source: Die Weltpresse, 5. Oktober 1946, page 5.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Austria 1920 only - Apr 5 2:00s 1:00 S
Rule Austria 1920 only - Sep 13 2:00s 0 -
Rule Austria 1946 only - Apr 14 2:00s 1:00 S
@@ -936,7 +936,7 @@ Zone Europe/Minsk 1:50:16 - LMT 1880
# The 1918 rules are listed for completeness; they apply to unoccupied Belgium.
# Assume Brussels switched to WET in 1918 when the armistice took effect.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Belgium 1918 only - Mar 9 0:00s 1:00 S
Rule Belgium 1918 1919 - Oct Sat>=1 23:00s 0 -
Rule Belgium 1919 only - Mar 1 23:00s 1:00 S
@@ -996,7 +996,7 @@ Zone Europe/Brussels 0:17:30 - LMT 1880
# EET -> EETDST is in 03:00 Local time in last Sunday of March ...
# EETDST -> EET is in 04:00 Local time in last Sunday of October
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Bulg 1979 only - Mar 31 23:00 1:00 S
Rule Bulg 1979 only - Oct 1 1:00 0 -
Rule Bulg 1980 1982 - Apr Sat>=1 23:00 1:00 S
@@ -1028,7 +1028,7 @@ Zone Europe/Sofia 1:33:16 - LMT 1880
# We know of no English-language name for historical Czech winter time;
# abbreviate it as "GMT", as it happened to be GMT.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Czech 1945 only - Apr Mon>=1 2:00s 1:00 S
Rule Czech 1945 only - Oct 1 2:00s 0 -
Rule Czech 1946 only - May 6 2:00s 1:00 S
@@ -1052,17 +1052,16 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# Denmark, Faroe Islands, and Greenland
# From Jesper Nørgaard Welen (2005-04-26):
-# http://www.hum.aau.dk/~poe/tid/tine/DanskTid.htm says that the law
-# [introducing standard time] was in effect from 1894-01-01....
-# The page http://www.retsinfo.dk/_GETDOCI_/ACCN/A18930008330-REGL
+# the law [introducing standard time] was in effect from 1894-01-01....
+# The page https://www.retsinformation.dk/eli/lta/1893/83
# confirms this, and states that the law was put forth 1893-03-29.
#
# The EU [actually, EEC and Euratom] treaty with effect from 1973:
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19722110030-REGL
+# https://www.retsinformation.dk/eli/lta/1972/21100
#
# This provoked a new law from 1974 to make possible summer time changes
# in subsequent decrees with the law
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19740022330-REGL
+# https://www.retsinformation.dk/eli/lta/1974/223
#
# It seems however that no decree was set forward until 1980. I have
# not found any decree, but in another related law, the effecting DST
@@ -1074,7 +1073,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# The law is about the management of the extra hour, concerning
# working hours reported and effect on obligatory-rest rules (which
# was suspended on that night):
-# http://www.retsinfo.dk/_GETDOCI_/ACCN/C19801120554-REGL
+# https://web.archive.org/web/20140104053304/https://www.retsinformation.dk/Forms/R0710.aspx?id=60267
# From Jesper Nørgaard Welen (2005-06-11):
# The Herning Folkeblad (1980-09-26) reported that the night between
@@ -1084,7 +1083,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850
# Hence the "02:00" of the 1980 law refers to standard time, not
# wall-clock time, and so the EU rules were in effect in 1980.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Denmark 1916 only - May 14 23:00 1:00 S
Rule Denmark 1916 only - Sep 30 23:00 0 -
Rule Denmark 1940 only - May 15 0:00 1:00 S
@@ -1186,7 +1185,7 @@ Zone Atlantic/Faroe -0:27:04 - LMT 1908 Jan 11 # Tórshavn
# http://naalakkersuisut.gl/~/media/Nanoq/Files/Attached%20Files/Engelske-tekster/Legislation/Executive%20Order%20National%20Park.rtf
# It is their only National Park.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Thule 1991 1992 - Mar lastSun 2:00 1:00 D
Rule Thule 1991 1992 - Sep lastSun 2:00 0 S
Rule Thule 1993 2006 - Apr Sun>=1 2:00 1:00 D
@@ -1317,7 +1316,7 @@ Zone Europe/Tallinn 1:39:00 - LMT 1880
# From Paul Eggert (2014-06-14):
# Go with Oja over Shanks.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Finland 1942 only - Apr 2 24:00 1:00 S
Rule Finland 1942 only - Oct 4 1:00 0 -
Rule Finland 1981 1982 - Mar lastSun 2:00 1:00 S
@@ -1349,10 +1348,58 @@ Link Europe/Helsinki Europe/Mariehamn
# Françoise Gauquelin, Problèmes de l'heure résolus en astrologie,
# Guy Trédaniel, Paris 1987
+# From Michael Deckers (2020-06-11):
+# the law of 1891
+# was published on 1891-03-15, so it could only take force on 1891-03-16.
+
+# From Michael Deckers (2020-06-10):
+# Le Gaulois, 1911-03-11, page 1/6, online at
+# https://www.retronews.fr/societe/echo-de-presse/2018/01/29/1911-change-lheure-de-paris
+# ... [ Instantly, all pressure driven clock dials halted... Nine minutes and
+# twenty-one seconds later the hands resumed their circular motion. ]
+# There are also precise reports about how the change was prepared in train
+# stations: all the publicly visible clocks stopped at midnight railway time
+# (or were covered), only the chief of service had a watch, labeled
+# "Heure ancienne", that he kept running until it reached 00:04:21, when
+# he announced "Heure nouvelle". See the "Le Petit Journal 1911-03-11".
+# https://gallica.bnf.fr/ark:/12148/bpt6k6192911/f1.item.zoom
+#
+# From Michael Deckers (2020-06-12):
+# That "all French clocks stopped" for 00:09:21 is a misreading of French
+# newspapers; this sort of adjustment applies only to certain
+# remote-controlled clocks ("pendules pneumatiques", of which there existed
+# perhaps a dozen in Paris, and which simply could not be set back remotely),
+# but not to all the clocks in all French towns and villages. For instance,
+# the following story in the "Courrier de Saône-et-Loire" 1911-03-11, page 2:
+# only works if legal time was stepped back (was not monotone): ...
+# [One can observe that children who had been born at midnight less 5
+# minutes and who had died at midnight of the old time, would turn out to
+# be dead before being born, time having been set back and having
+# suppressed 9 minutes and 25 seconds of their existence, that is, more
+# than they could spend.]
+#
+# From Paul Eggert (2020-06-12):
+# French time in railway stations was legally five minutes behind civil time,
+# which explains why railway "old time" ran to 00:04:21 instead of to 00:09:21.
+# The law's text (which Michael Deckers noted is at
+# ) says only that
+# at 1911-03-11 00:00 legal time was that of Paris mean time delayed by
+# nine minutes and twenty-one seconds, and does not say how the
+# transition from Paris mean time was to occur.
+#
+# tzdb has no way to represent stopped clocks. As the railway practice
+# was to keep a watch running on "old time" to decide when to restart
+# the other clocks, this could be modeled as a transition for "old time" at
+# 00:09:21. However, since the law was ambiguous and clocks outside railway
+# stations were probably done haphazardly with the popular impression being
+# that the transition was done at 00:00 "old time", simply leave the time
+# blank; this causes zic to default to 00:00 "old time" which is good enough.
+# Do something similar for the 1891-03-16 transition. There are similar
+# problems in Algiers, Monaco and Tunis.
#
# Shank & Pottenger seem to use '24:00' ambiguously; resolve it with Whitman.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule France 1916 only - Jun 14 23:00s 1:00 S
Rule France 1916 1919 - Oct Sun>=1 23:00s 0 -
Rule France 1917 only - Mar 24 23:00s 1:00 S
@@ -1412,13 +1459,11 @@ Rule France 1945 only - Sep 16 3:00 0 -
# go with Excoffier's 28/3/76 0hUT and 25/9/76 23hUT.
Rule France 1976 only - Mar 28 1:00 1:00 S
Rule France 1976 only - Sep 26 1:00 0 -
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time, and Whitman 0:09:05,
-# but Howse quotes the actual French legislation as saying 0:09:21.
-# Go with Howse. Howse writes that the time in France was officially based
+# Howse writes that the time in France was officially based
# on PMT-0:09:21 until 1978-08-09, when the time base finally switched to UTC.
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
- 0:09:21 - PMT 1911 Mar 11 0:01 # Paris MT
+Zone Europe/Paris 0:09:21 - LMT 1891 Mar 16
+ 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
# Shanks & Pottenger give 1940 Jun 14 0:00; go with Excoffier and Le Corre.
0:00 France WE%sT 1940 Jun 14 23:00
# Le Corre says Paris stuck with occupied-France time after the liberation;
@@ -1447,7 +1492,7 @@ Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01
# this was equivalent to UT +03, not +04.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Germany 1946 only - Apr 14 2:00s 1:00 S
Rule Germany 1946 only - Oct 7 2:00s 0 -
Rule Germany 1947 1949 - Oct Sun>=1 2:00s 0 -
@@ -1499,7 +1544,7 @@ Zone Europe/Gibraltar -0:21:24 - LMT 1880 Aug 2 0:00s
1:00 EU CE%sT
# Greece
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Whitman gives 1932 Jul 5 - Nov 1; go with Shanks & Pottenger.
Rule Greece 1932 only - Jul 7 0:00 1:00 S
Rule Greece 1932 only - Sep 1 0:00 0 -
@@ -1534,38 +1579,73 @@ Zone Europe/Athens 1:34:52 - LMT 1895 Sep 14
2:00 EU EE%sT
# Hungary
-# From Paul Eggert (2014-07-15):
-# Dates for 1916-1945 are taken from:
-# Oross A. Jelen a múlt jövője: a nyári időszámítás Magyarországon 1916-1945.
-# National Archives of Hungary (2012-10-29).
-# http://mnl.gov.hu/a_het_dokumentuma/a_nyari_idoszamitas_magyarorszagon_19161945.html
-# This source does not always give times, which are taken from Shanks
-# & Pottenger (which disagree about the dates).
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-Rule Hungary 1918 only - Apr 1 3:00 1:00 S
-Rule Hungary 1918 only - Sep 16 3:00 0 -
-Rule Hungary 1919 only - Apr 15 3:00 1:00 S
-Rule Hungary 1919 only - Nov 24 3:00 0 -
+
+# From Michael Deckers (2020-06-09):
+# an Austrian encyclopedia of railroads of 1913, online at
+# http://www.zeno.org/Roell-1912/A/Eisenbahnzeit
+# says that the switch [to CET] happened on 1890-11-01.
+
+# From Géza Nyáry (2020-06-07):
+# Data for 1918-1983 are based on the archive database of Library Hungaricana.
+# The dates are collected from original, scanned governmental orders,
+# bulletins, instructions and public press.
+# [See URLs below.]
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1918/?pg=238
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1919/?pg=808
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1920/?pg=201
+Rule Hungary 1918 1919 - Apr 15 2:00 1:00 S
+Rule Hungary 1918 1920 - Sep Mon>=15 3:00 0 -
+Rule Hungary 1920 only - Apr 5 2:00 1:00 S
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1945/?pg=882
Rule Hungary 1945 only - May 1 23:00 1:00 S
-Rule Hungary 1945 only - Nov 1 0:00 0 -
+Rule Hungary 1945 only - Nov 1 1:00 0 -
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_03/?pg=49
Rule Hungary 1946 only - Mar 31 2:00s 1:00 S
-Rule Hungary 1946 1949 - Oct Sun>=1 2:00s 0 -
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_09/?pg=54
+Rule Hungary 1946 only - Oct 7 2:00 0 -
+# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1947_04_1__001-123/?pg=90
+# https://library.hungaricana.hu/hu/view/DunantuliNaplo_1947_09/?pg=128
+# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1948_03_3__001-123/?pg=304
+# https://library.hungaricana.hu/hu/view/Zala_1948_09/?pg=64
+# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=53
+# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=160
+# https://library.hungaricana.hu/hu/view/UjSzo_1949_01-04/?pg=102
+# https://library.hungaricana.hu/hu/view/KeletMagyarorszag_1949_03/?pg=96
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1949_09/?pg=94
Rule Hungary 1947 1949 - Apr Sun>=4 2:00s 1:00 S
-Rule Hungary 1950 only - Apr 17 2:00s 1:00 S
-Rule Hungary 1950 only - Oct 23 2:00s 0 -
-Rule Hungary 1954 1955 - May 23 0:00 1:00 S
-Rule Hungary 1954 1955 - Oct 3 0:00 0 -
-Rule Hungary 1956 only - Jun Sun>=1 0:00 1:00 S
-Rule Hungary 1956 only - Sep lastSun 0:00 0 -
-Rule Hungary 1957 only - Jun Sun>=1 1:00 1:00 S
-Rule Hungary 1957 only - Sep lastSun 3:00 0 -
-Rule Hungary 1980 only - Apr 6 1:00 1:00 S
+Rule Hungary 1947 1949 - Oct Sun>=1 2:00s 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1954/?pg=513
+Rule Hungary 1954 only - May 23 0:00 1:00 S
+Rule Hungary 1954 only - Oct 3 0:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1955/?pg=398
+Rule Hungary 1955 only - May 22 2:00 1:00 S
+Rule Hungary 1955 only - Oct 2 3:00 0 -
+# https://library.hungaricana.hu/hu/view/HevesMegyeiNepujsag_1956_06/?pg=0
+# https://library.hungaricana.hu/hu/view/EszakMagyarorszag_1956_06/?pg=6
+# https://library.hungaricana.hu/hu/view/SzolnokMegyeiNeplap_1957_04/?pg=120
+# https://library.hungaricana.hu/hu/view/PestMegyeiHirlap_1957_09/?pg=143
+Rule Hungary 1956 1957 - Jun Sun>=1 2:00 1:00 S
+Rule Hungary 1956 1957 - Sep lastSun 3:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=189
+Rule Hungary 1980 only - Apr 6 0:00 1:00 S
+Rule Hungary 1980 only - Sep 28 1:00 0 -
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=1227
+# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1981_01/?pg=79
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1982/?pg=115
+# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1983/?pg=85
+Rule Hungary 1981 1983 - Mar lastSun 0:00 1:00 S
+Rule Hungary 1981 1983 - Sep lastSun 1:00 0 -
+#
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Budapest 1:16:20 - LMT 1890 Oct
+Zone Europe/Budapest 1:16:20 - LMT 1890 Nov 1
1:00 C-Eur CE%sT 1918
- 1:00 Hungary CE%sT 1941 Apr 8
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1941/?pg=1204
+# https://library.hungaricana.hu/hu/view/OGYK_RT_1942/?pg=3955
+ 1:00 Hungary CE%sT 1941 Apr 7 23:00
1:00 C-Eur CE%sT 1945
- 1:00 Hungary CE%sT 1980 Sep 28 2:00s
+ 1:00 Hungary CE%sT 1984
1:00 EU CE%sT
# Iceland
@@ -1601,7 +1681,7 @@ Zone Europe/Budapest 1:16:20 - LMT 1890 Oct
# The information below is taken from the 1988 Almanak; see
# http://www.almanak.hi.is/klukkan.html
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Iceland 1917 1919 - Feb 19 23:00 1:00 -
Rule Iceland 1917 only - Oct 21 1:00 0 -
Rule Iceland 1918 1919 - Nov 16 1:00 0 -
@@ -1693,7 +1773,7 @@ Zone Atlantic/Reykjavik -1:28 - LMT 1908
# to 1944-06-04; although Rome was an open city during this period, it
# was effectively controlled by Germany.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Italy 1916 only - Jun 3 24:00 1:00 S
Rule Italy 1916 1917 - Sep 30 24:00 0 -
Rule Italy 1917 only - Mar 31 24:00 1:00 S
@@ -1803,7 +1883,7 @@ Link Europe/Rome Europe/San_Marino
# urged Lithuania and Estonia to adopt a similar time policy, but it
# appears that they will not do so....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Latvia 1989 1996 - Mar lastSun 2:00s 1:00 S
Rule Latvia 1989 1996 - Sep lastSun 2:00s 0 -
@@ -1896,7 +1976,7 @@ Zone Europe/Vilnius 1:41:16 - LMT 1880
# Luxembourg
# Whitman disagrees with most of these dates in minor ways;
# go with Shanks & Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Lux 1916 only - May 14 23:00 1:00 S
Rule Lux 1916 only - Oct 1 1:00 0 -
Rule Lux 1917 only - Apr 28 23:00 1:00 S
@@ -1937,7 +2017,7 @@ Zone Europe/Luxembourg 0:24:36 - LMT 1904 Jun
# From Paul Eggert (2016-10-21):
# Assume 1900-1972 was like Rome, overriding Shanks.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Malta 1973 only - Mar 31 0:00s 1:00 S
Rule Malta 1973 only - Sep 29 0:00s 0 -
Rule Malta 1974 only - Apr 21 0:00s 1:00 S
@@ -2010,7 +2090,7 @@ Zone Europe/Malta 0:58:04 - LMT 1893 Nov 2 0:00s # Valletta
# says the 2014-03-30 spring-forward transition was at 02:00 local time.
# Guess that since 1997 Moldova has switched one hour before the EU.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Moldova 1997 max - Mar lastSun 2:00 1:00 S
Rule Moldova 1997 max - Oct lastSun 3:00 0 -
@@ -2028,11 +2108,24 @@ Zone Europe/Chisinau 1:55:20 - LMT 1880
2:00 Moldova EE%sT
# Monaco
-# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's
-# more precise 0:09:21.
+#
+# From Michael Deckers (2020-06-12):
+# In the "Journal de Monaco" of 1892-05-24, online at
+# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/b1c67c12c5af11b41ea888fb048e4fe8.pdf
+# we read: ...
+# [In virtue of a Sovereign Ordinance of the May 13 of the current [year],
+# legal time in the Principality will be set to, from the date of June 1,
+# 1892 onwards, to the meridian of Paris, as in France.]
+# In the "Journal de Monaco" of 1911-03-28, online at
+# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/de74ffb7db53d4f599059fe8f0ed482a.pdf
+# we read an ordinance of 1911-03-16: ...
+# [Legal time in the Principality will be set, from the date of promulgation
+# of the present ordinance, to legal time in France.... Consequently, legal
+# time will be retarded by 9 minutes and 21 seconds.]
+#
# Zone NAME STDOFF RULES FORMAT [UNTIL]
-Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
- 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time
+Zone Europe/Monaco 0:29:32 - LMT 1892 Jun 1
+ 0:09:21 - PMT 1911 Mar 29 # Paris Mean Time
0:00 France WE%sT 1945 Sep 16 3:00
1:00 France CE%sT 1977
1:00 EU CE%sT
@@ -2080,7 +2173,7 @@ Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15
# The data entries before 1945 are taken from
# https://www.staff.science.uu.nl/~gent0113/wettijd/wettijd.htm
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time
Rule Neth 1916 only - Oct 1 0:00 0 AMT # Amsterdam Mean Time
Rule Neth 1917 only - Apr 16 2:00s 1:00 NST
@@ -2117,7 +2210,7 @@ Zone Europe/Amsterdam 0:19:32 - LMT 1835
# Norway
# http://met.no/met/met_lex/q_u/sommertid.html (2004-01) agrees with Shanks &
# Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Norway 1916 only - May 22 1:00 1:00 S
Rule Norway 1916 only - Sep 30 0:00 0 -
Rule Norway 1945 only - Apr 2 2:00s 1:00 S
@@ -2186,7 +2279,7 @@ Link Europe/Oslo Arctic/Longyearbyen
# The 1919 dates and times can be found in Tygodnik Urzędowy nr 1 (1919-03-20),
# pp 1-2.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Poland 1918 1919 - Sep 16 2:00s 0 -
Rule Poland 1919 only - Apr 15 2:00s 1:00 S
Rule Poland 1944 only - Apr 3 2:00s 1:00 S
@@ -2257,7 +2350,7 @@ Zone Europe/Warsaw 1:24:00 - LMT 1880
# Guess that the Azores changed to EU rules in 1992 (since that's when Portugal
# harmonized with EU rules), and that they stayed +0:00 that winter.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# DSH writes that despite Decree 1,469 (1915), the change to the clocks was not
# done every year, depending on what Spain did, because of railroad schedules.
# Go with Shanks & Pottenger.
@@ -2370,7 +2463,7 @@ Zone Atlantic/Madeira -1:07:36 - LMT 1884 # Funchal
# assume that Romania and Moldova switched to EU rules in 1997,
# the same year as Bulgaria.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Romania 1932 only - May 21 0:00s 1:00 S
Rule Romania 1932 1939 - Oct Sun>=1 0:00s 0 -
Rule Romania 1933 1939 - Apr Sun>=2 0:00s 1:00 S
@@ -3468,14 +3561,14 @@ Link Europe/Prague Europe/Bratislava
# fallback transition from the next day's 00:59... to 00:00.
# From Michael Deckers (2016-12-15):
-# The Royal Decree of 1900-06-26 quoted by Planesas, online at
+# The Royal Decree of 1900-07-26 quoted by Planesas, online at
# https://www.boe.es/datos/pdfs/BOE//1900/209/A00383-00384.pdf
# says in its article 5 (my translation):
# These dispositions will enter into force beginning with the
# instant at which, according to the time indicated in article 1,
# the 1st day of January of 1901 will begin.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Spain 1918 only - Apr 15 23:00 1:00 S
Rule Spain 1918 1919 - Oct 6 24:00s 0 -
Rule Spain 1919 only - Apr 6 23:00 1:00 S
@@ -3612,7 +3705,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1
# By the end of the 18th century clocks and watches became commonplace
# and their performance improved enormously. Communities began to keep
# mean time in preference to apparent time - Geneva from 1780 ....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# From Whitman (who writes "Midnight?"):
# Rule Swiss 1940 only - Nov 2 0:00 1:00 S
# Rule Swiss 1940 only - Dec 31 0:00 0 -
@@ -3699,7 +3792,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1
# 1853-07-16, though it probably occurred at some other date in Zurich, and
# legal civil time probably changed at still some other transition date.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Swiss 1941 1942 - May Mon>=1 1:00 1:00 S
Rule Swiss 1941 1942 - Oct Mon>=1 2:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -3848,7 +3941,7 @@ Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 # See above comment.
# Although Google Translate misfires on that source, it looks like
# Turkey reversed last month's decision, and so will stay at +03.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Turkey 1916 only - May 1 0:00 1:00 S
Rule Turkey 1916 only - Oct 1 0:00 0 -
Rule Turkey 1920 only - Mar 28 0:00 1:00 S
@@ -4006,7 +4099,7 @@ Zone Europe/Kiev 2:02:04 - LMT 1880
2:00 1:00 EEST 1991 Sep 29 3:00
2:00 E-Eur EE%sT 1995
2:00 EU EE%sT
-# Ruthenia used CET 1990/1991.
+# Transcarpathia used CET 1990/1991.
# "Uzhhorod" is the transliteration of the Rusyn/Ukrainian pronunciation, but
# "Uzhgorod" is more common in English.
Zone Europe/Uzhgorod 1:29:12 - LMT 1890 Oct
diff --git a/make/data/tzdata/leapseconds b/make/data/tzdata/leapseconds
index fe8e170ed26..e00b297baed 100644
--- a/make/data/tzdata/leapseconds
+++ b/make/data/tzdata/leapseconds
@@ -91,11 +91,11 @@ Leap 2016 Dec 31 23:59:60 + S
# Any additional leap seconds will come after this.
# This Expires line is commented out for now,
# so that pre-2020a zic implementations do not reject this file.
-#Expires 2020 Dec 28 00:00:00
+#Expires 2021 Jun 28 00:00:00
# POSIX timestamps for the data in this file:
#updated 1467936000 (2016-07-08 00:00:00 UTC)
-#expires 1609113600 (2020-12-28 00:00:00 UTC)
+#expires 1624838400 (2021-06-28 00:00:00 UTC)
-# Updated through IERS Bulletin C59
-# File expires on: 28 December 2020
+# Updated through IERS Bulletin C60
+# File expires on: 28 June 2021
diff --git a/make/data/tzdata/northamerica b/make/data/tzdata/northamerica
index 60c7addef09..9a70e313c78 100644
--- a/make/data/tzdata/northamerica
+++ b/make/data/tzdata/northamerica
@@ -193,7 +193,7 @@
# U.S. government action. So even though the "US" rules have changed
# in the latest release, other countries won't be affected.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule US 1918 1919 - Mar lastSun 2:00 1:00 D
Rule US 1918 1919 - Oct lastSun 2:00 0 S
Rule US 1942 only - Feb 9 2:00 1:00 W # War
@@ -370,7 +370,7 @@ Zone PST8PDT -8:00 US P%sT
# Eastern time (i.e., -4:56:01.6) just before the 1883 switch. Round to the
# nearest second.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule NYC 1920 only - Mar lastSun 2:00 1:00 D
Rule NYC 1920 only - Oct lastSun 2:00 0 S
Rule NYC 1921 1966 - Apr lastSun 2:00 1:00 D
@@ -454,7 +454,7 @@ Zone America/New_York -4:56:02 - LMT 1883 Nov 18 12:03:58
# The Tennessean 2007-05-11, republished 2015-04-06.
# https://www.tennessean.com/story/insider/extras/2015/04/06/archives-seigenthaler-for-100-years-the-tennessean-had-it-covered/25348545/
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Chicago 1920 only - Jun 13 2:00 1:00 D
Rule Chicago 1920 1921 - Oct lastSun 2:00 0 S
Rule Chicago 1921 only - Mar lastSun 2:00 1:00 D
@@ -523,7 +523,7 @@ Zone America/North_Dakota/Beulah -6:47:07 - LMT 1883 Nov 18 12:12:53
# El Paso Times. 2018-10-24 06:40 -06.
# https://www.elpasotimes.com/story/news/local/el-paso/2018/10/24/el-pasoans-were-time-rebels-fought-stay-mountain-zone/1744509002/
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Denver 1920 1921 - Mar lastSun 2:00 1:00 D
Rule Denver 1920 only - Oct lastSun 2:00 0 S
Rule Denver 1921 only - May 22 2:00 0 S
@@ -576,7 +576,7 @@ Zone America/Denver -6:59:56 - LMT 1883 Nov 18 12:00:04
# https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1501&context=ca_ballot_props
# https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1636&context=ca_ballot_props
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule CA 1948 only - Mar 14 2:01 1:00 D
Rule CA 1949 only - Jan 1 2:00 0 S
Rule CA 1950 1966 - Apr lastSun 1:00 1:00 D
@@ -934,7 +934,7 @@ Zone America/Boise -7:44:49 - LMT 1883 Nov 18 12:15:11
# going to switch from Central to Eastern Time on March 11, 2007....
# http://www.indystar.com/apps/pbcs.dll/article?AID=/20070207/LOCAL190108/702070524/0/LOCAL
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Indianapolis 1941 only - Jun 22 2:00 1:00 D
Rule Indianapolis 1941 1954 - Sep lastSun 2:00 0 S
Rule Indianapolis 1946 1954 - Apr lastSun 2:00 1:00 D
@@ -953,7 +953,7 @@ Zone America/Indiana/Indianapolis -5:44:38 - LMT 1883 Nov 18 12:15:22
#
# Eastern Crawford County, Indiana, left its clocks alone in 1974,
# as well as from 1976 through 2005.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Marengo 1951 only - Apr lastSun 2:00 1:00 D
Rule Marengo 1951 only - Sep lastSun 2:00 0 S
Rule Marengo 1954 1960 - Apr lastSun 2:00 1:00 D
@@ -972,7 +972,7 @@ Zone America/Indiana/Marengo -5:45:23 - LMT 1883 Nov 18 12:14:37
# Daviess, Dubois, Knox, and Martin Counties, Indiana,
# switched from eastern to central time in April 2006, then switched back
# in November 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Vincennes 1946 only - Apr lastSun 2:00 1:00 D
Rule Vincennes 1946 only - Sep lastSun 2:00 0 S
Rule Vincennes 1953 1954 - Apr lastSun 2:00 1:00 D
@@ -997,7 +997,7 @@ Zone America/Indiana/Vincennes -5:50:07 - LMT 1883 Nov 18 12:09:53
# The Indianapolis News, Friday 27 October 1967 states that Perry County
# returned to CST. It went again to EST on 27 April 1969, as documented by the
# Indianapolis star of Saturday 26 April.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Perry 1955 only - May 1 0:00 1:00 D
Rule Perry 1955 1960 - Sep lastSun 2:00 0 S
Rule Perry 1956 1963 - Apr lastSun 2:00 1:00 D
@@ -1014,7 +1014,7 @@ Zone America/Indiana/Tell_City -5:47:03 - LMT 1883 Nov 18 12:12:57
#
# Pike County, Indiana moved from central to eastern time in 1977,
# then switched back in 2006, then switched back again in 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Pike 1955 only - May 1 0:00 1:00 D
Rule Pike 1955 1960 - Sep lastSun 2:00 0 S
Rule Pike 1956 1964 - Apr lastSun 2:00 1:00 D
@@ -1035,7 +1035,7 @@ Zone America/Indiana/Petersburg -5:49:07 - LMT 1883 Nov 18 12:10:53
# An article on page A3 of the Sunday, 1991-10-27 Washington Post
# notes that Starke County switched from Central time to Eastern time as of
# 1991-10-27.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Starke 1947 1961 - Apr lastSun 2:00 1:00 D
Rule Starke 1947 1954 - Sep lastSun 2:00 0 S
Rule Starke 1955 1956 - Oct lastSun 2:00 0 S
@@ -1052,7 +1052,7 @@ Zone America/Indiana/Knox -5:46:30 - LMT 1883 Nov 18 12:13:30
#
# Pulaski County, Indiana, switched from eastern to central time in
# April 2006 and then switched back in March 2007.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Pulaski 1946 1960 - Apr lastSun 2:00 1:00 D
Rule Pulaski 1946 1954 - Sep lastSun 2:00 0 S
Rule Pulaski 1955 1956 - Oct lastSun 2:00 0 S
@@ -1094,7 +1094,7 @@ Zone America/Indiana/Vevay -5:40:16 - LMT 1883 Nov 18 12:19:44
#
# Part of Kentucky left its clocks alone in 1974.
# This also includes Clark, Floyd, and Harrison counties in Indiana.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Louisville 1921 only - May 1 2:00 1:00 D
Rule Louisville 1921 only - Sep 1 2:00 0 S
Rule Louisville 1941 only - Apr lastSun 2:00 1:00 D
@@ -1208,7 +1208,7 @@ Zone America/Kentucky/Monticello -5:39:24 - LMT 1883 Nov 18 12:20:36
# election Michigan voters narrowly repealed DST, effective 1969.
#
# Most of Michigan observed DST from 1973 on, but was a bit late in 1975.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Detroit 1948 only - Apr lastSun 2:00 1:00 D
Rule Detroit 1948 only - Sep lastSun 2:00 0 S
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1225,7 +1225,7 @@ Zone America/Detroit -5:32:11 - LMT 1905
#
# Dickinson, Gogebic, Iron, and Menominee Counties, Michigan,
# switched from EST to CST/CDT in 1973.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER
+# Rule NAME FROM TO - IN ON AT SAVE LETTER
Rule Menominee 1946 only - Apr lastSun 2:00 1:00 D
Rule Menominee 1946 only - Sep lastSun 2:00 0 S
Rule Menominee 1966 only - Apr lastSun 2:00 1:00 D
@@ -1395,7 +1395,7 @@ Zone America/Menominee -5:50:27 - LMT 1885 Sep 18 12:00
# Oct 31, to Oct 27, 1918 (and Sunday is a more likely transition day
# than Thursday) in all Canadian rulesets.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Canada 1918 only - Apr 14 2:00 1:00 D
Rule Canada 1918 only - Oct 27 2:00 0 S
Rule Canada 1942 only - Feb 9 2:00 1:00 W # War
@@ -1418,7 +1418,7 @@ Rule Canada 2007 max - Nov Sun>=1 2:00 0 S
# that follows the rules is the southeast corner, including Port Hope
# Simpson and Mary's Harbour, but excluding, say, Black Tickle.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule StJohns 1917 only - Apr 8 2:00 1:00 D
Rule StJohns 1917 only - Sep 17 2:00 0 S
# Whitman gives 1919 Apr 5 and 1920 Apr 5; go with Shanks & Pottenger.
@@ -1520,7 +1520,7 @@ Zone America/Goose_Bay -4:01:40 - LMT 1884 # Happy Valley-Goose Bay
# bill say that it is "accommodating the customs and practices" of those
# regions, which suggests that they have always been in-line with Halifax.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Halifax 1916 only - Apr 1 0:00 1:00 D
Rule Halifax 1916 only - Oct 1 0:00 0 S
Rule Halifax 1920 only - May 9 0:00 1:00 D
@@ -1586,7 +1586,7 @@ Zone America/Glace_Bay -3:59:48 - LMT 1902 Jun 15
# clear that this was the case since at least 1993.
# For now, assume it started in 1993.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Moncton 1933 1935 - Jun Sun>=8 1:00 1:00 D
Rule Moncton 1933 1935 - Sep Sun>=8 1:00 0 S
Rule Moncton 1936 1938 - Jun Sun>=1 1:00 1:00 D
@@ -1795,7 +1795,7 @@ Zone America/Blanc-Sablon -3:48:28 - LMT 1884
# With some exceptions, the use of daylight saving may be said to be limited
# to those cities and towns lying between Quebec city and Windsor, Ont.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Toronto 1919 only - Mar 30 23:30 1:00 D
Rule Toronto 1919 only - Oct 26 0:00 0 S
Rule Toronto 1920 only - May 2 2:00 1:00 D
@@ -1893,7 +1893,7 @@ Zone America/Atikokan -6:06:28 - LMT 1895
# starting 1966. Since 02:00s is clearly correct for 1967 on, assume
# it was also 02:00s in 1966.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Winn 1916 only - Apr 23 0:00 1:00 D
Rule Winn 1916 only - Sep 17 0:00 0 S
Rule Winn 1918 only - Apr 14 2:00 1:00 D
@@ -1984,7 +1984,7 @@ Zone America/Winnipeg -6:28:36 - LMT 1887 Jul 16
# long and rather painful to read.
# http://www.qp.gov.sk.ca/documents/English/Statutes/Statutes/T14.pdf
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Regina 1918 only - Apr 14 2:00 1:00 D
Rule Regina 1918 only - Oct 27 2:00 0 S
Rule Regina 1930 1934 - May Sun>=1 0:00 1:00 D
@@ -2034,7 +2034,7 @@ Zone America/Swift_Current -7:11:20 - LMT 1905 Sep
# Boyer JP. Forcing Choice: The Risky Reward of Referendums. Dundum. 2017.
# ISBN 978-1459739123.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Edm 1918 1919 - Apr Sun>=8 2:00 1:00 D
Rule Edm 1918 only - Oct 27 2:00 0 S
Rule Edm 1919 only - May 27 2:00 0 S
@@ -2143,7 +2143,7 @@ Zone America/Edmonton -7:33:52 - LMT 1906 Sep
# https://searcharchives.vancouver.ca/daylight-saving-1918-starts-again-july-7-1941-start-d-s-sept-27-end-of-d-s-1941
# We have no further details, so omit them for now.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Vanc 1918 only - Apr 14 2:00 1:00 D
Rule Vanc 1918 only - Oct 27 2:00 0 S
Rule Vanc 1942 only - Feb 9 2:00 1:00 W # War
@@ -2472,7 +2472,19 @@ Zone America/Creston -7:46:04 - LMT 1884
# consistency with nearby Dawson Creek, Creston, and Fort Nelson.
# https://yukon.ca/en/news/yukon-end-seasonal-time-change
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# From Andrew G. Smith (2020-09-24):
+# Yukon has completed its regulatory change to be on UTC -7 year-round....
+# http://www.gov.yk.ca/legislation/regs/oic2020_125.pdf
+# What we have done is re-defined Yukon Standard Time, as we are
+# authorized to do under section 33 of our Interpretation Act:
+# http://www.gov.yk.ca/legislation/acts/interpretation_c.pdf
+#
+# From Paul Eggert (2020-09-24):
+# tzdb uses the obsolete YST abbreviation for standard time in Yukon through
+# about 1970, and uses PST for standard time in Yukon since then. Consistent
+# with that, use MST for -07, the new standard time in Yukon effective Nov. 1.
+
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule NT_YK 1918 only - Apr 14 2:00 1:00 D
Rule NT_YK 1918 only - Oct 27 2:00 0 S
Rule NT_YK 1919 only - May 25 2:00 1:00 D
@@ -2526,12 +2538,12 @@ Zone America/Inuvik 0 - -00 1953 # Inuvik founded
Zone America/Whitehorse -9:00:12 - LMT 1900 Aug 20
-9:00 NT_YK Y%sT 1967 May 28 0:00
-8:00 NT_YK P%sT 1980
- -8:00 Canada P%sT 2020 Mar 8 2:00
+ -8:00 Canada P%sT 2020 Nov 1
-7:00 - MST
Zone America/Dawson -9:17:40 - LMT 1900 Aug 20
-9:00 NT_YK Y%sT 1973 Oct 28 0:00
-8:00 NT_YK P%sT 1980
- -8:00 Canada P%sT 2020 Mar 8 2:00
+ -8:00 Canada P%sT 2020 Nov 1
-7:00 - MST
@@ -2746,7 +2758,7 @@ Zone America/Dawson -9:17:40 - LMT 1900 Aug 20
# 5- The islands, reefs and keys shall take their timezone from the
# longitude they are located at.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Mexico 1939 only - Feb 5 0:00 1:00 D
Rule Mexico 1939 only - Jun 25 0:00 0 S
Rule Mexico 1940 only - Dec 9 0:00 1:00 D
@@ -2951,7 +2963,7 @@ Zone America/Tijuana -7:48:04 - LMT 1922 Jan 1 0:11:56
# rules to sync with the U.S. starting in 2007....
# http://www.jonesbahamas.com/?c=45&a=10412
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Bahamas 1964 1975 - Oct lastSun 2:00 0 S
Rule Bahamas 1964 1975 - Apr lastSun 2:00 1:00 D
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -2963,7 +2975,7 @@ Zone America/Nassau -5:09:30 - LMT 1912 Mar 2
# For 1899 Milne gives -3:58:29.2; round that.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Barb 1977 only - Jun 12 2:00 1:00 D
Rule Barb 1977 1978 - Oct Sun>=1 2:00 0 S
Rule Barb 1978 1980 - Apr Sun>=15 2:00 1:00 D
@@ -2976,7 +2988,7 @@ Zone America/Barbados -3:58:29 - LMT 1924 # Bridgetown
# Belize
# Whitman entirely disagrees with Shanks; go with Shanks & Pottenger.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Belize 1918 1942 - Oct Sun>=2 0:00 0:30 -0530
Rule Belize 1919 1943 - Feb Sun>=9 0:00 0 CST
Rule Belize 1973 only - Dec 5 0:00 1:00 CDT
@@ -3013,7 +3025,7 @@ Zone Atlantic/Bermuda -4:19:18 - LMT 1930 Jan 1 2:00 # Hamilton
# Milne gives -5:36:13.3 as San José mean time; round to nearest.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule CR 1979 1980 - Feb lastSun 0:00 1:00 D
Rule CR 1979 1980 - Jun Sun>=1 0:00 0 S
Rule CR 1991 1992 - Jan Sat>=15 0:00 1:00 D
@@ -3187,7 +3199,7 @@ Zone America/Costa_Rica -5:36:13 - LMT 1890 # San José
# From Paul Eggert (2012-11-03):
# For now, assume the future rule is first Sunday in November.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Cuba 1928 only - Jun 10 0:00 1:00 D
Rule Cuba 1928 only - Oct 10 0:00 0 S
Rule Cuba 1940 1942 - Jun Sun>=1 0:00 1:00 D
@@ -3256,7 +3268,7 @@ Zone America/Havana -5:29:28 - LMT 1890
# decided to revert.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule DR 1966 only - Oct 30 0:00 1:00 EDT
Rule DR 1967 only - Feb 28 0:00 0 EST
Rule DR 1969 1973 - Oct lastSun 0:00 0:30 -0430
@@ -3273,7 +3285,7 @@ Zone America/Santo_Domingo -4:39:36 - LMT 1890
# El Salvador
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Salv 1987 1988 - May Sun>=1 0:00 1:00 D
Rule Salv 1987 1988 - Sep lastSun 0:00 0 S
# There are too many San Salvadors elsewhere, so use America/El_Salvador
@@ -3302,7 +3314,7 @@ Zone America/El_Salvador -5:56:48 - LMT 1921 # San Salvador
# (2006-04-19), says DST ends at 24:00. See
# http://www.sieca.org.gt/Sitio_publico/Energeticos/Doc/Medidas/Cambio_Horario_Nac_190406.pdf
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Guat 1973 only - Nov 25 0:00 1:00 D
Rule Guat 1974 only - Feb 24 0:00 0 S
Rule Guat 1983 only - May 21 0:00 1:00 D
@@ -3383,7 +3395,7 @@ Zone America/Guatemala -6:02:04 - LMT 1918 Oct 5
# I have not been able to find a more authoritative source:
# https://www.haitilibre.com/en/news-20319-haiti-notices-time-change-in-haiti.html
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Haiti 1983 only - May 8 0:00 1:00 D
Rule Haiti 1984 1987 - Apr lastSun 0:00 1:00 D
Rule Haiti 1983 1987 - Oct lastSun 0:00 0 S
@@ -3431,7 +3443,7 @@ Zone America/Port-au-Prince -4:49:20 - LMT 1890
# http://www.laprensahn.com/pais_nota.php?id04962=7386
# So it seems that Honduras will not enter DST this year....
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Hond 1987 1988 - May Sun>=1 0:00 1:00 D
Rule Hond 1987 1988 - Sep lastSun 0:00 0 S
Rule Hond 2006 only - May Sun>=1 0:00 1:00 D
@@ -3522,7 +3534,7 @@ Zone America/Martinique -4:04:20 - LMT 1890 # Fort-de-France
# The natural sun time is restored in all the national territory, in that the
# time is returned one hour at 01:00 am of October 1 of 2006.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Nic 1979 1980 - Mar Sun>=16 0:00 1:00 D
Rule Nic 1979 1980 - Jun Mon>=23 0:00 0 S
Rule Nic 2005 only - Apr 10 0:00 1:00 D
diff --git a/make/data/tzdata/pacificnew b/make/data/tzdata/pacificnew
deleted file mode 100644
index f19a876372c..00000000000
--- a/make/data/tzdata/pacificnew
+++ /dev/null
@@ -1,52 +0,0 @@
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-# tzdb data for proposed US election time (this file is obsolete)
-
-# This file is in the public domain, so clarified as of
-# 2009-05-17 by Arthur David Olson.
-
-# From Arthur David Olson (1989-04-05):
-# On 1989-04-05, the U. S. House of Representatives passed (238-154) a bill
-# establishing "Pacific Presidential Election Time"; it was not acted on
-# by the Senate or signed into law by the President.
-# You might want to change the "PE" (Presidential Election) below to
-# "Q" (Quadrennial) to maintain three-character zone abbreviations.
-# If you're really conservative, you might want to change it to "D".
-# Avoid "L" (Leap Year), which won't be true in 2100.
-
-# If Presidential Election Time is ever established, replace "XXXX" below
-# with the year the law takes effect and uncomment the "##" lines.
-
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-## Rule Twilite XXXX max - Apr Sun>=1 2:00 1:00 D
-## Rule Twilite XXXX max uspres Oct lastSun 2:00 1:00 PE
-## Rule Twilite XXXX max uspres Nov Sun>=7 2:00 0 S
-## Rule Twilite XXXX max nonpres Oct lastSun 2:00 0 S
-
-# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL]
-## Zone America/Los_Angeles-PET -8:00 US P%sT XXXX
-## -8:00 Twilite P%sT
-
-# For now...
-Link America/Los_Angeles US/Pacific-New ##
diff --git a/make/data/tzdata/southamerica b/make/data/tzdata/southamerica
index 51795f7621b..566dabfadb4 100644
--- a/make/data/tzdata/southamerica
+++ b/make/data/tzdata/southamerica
@@ -71,7 +71,7 @@
# I am sending modifications to the Argentine time zone table...
# AR was chosen because they are the ISO letters that represent Argentina.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Arg 1930 only - Dec 1 0:00 1:00 -
Rule Arg 1931 only - Apr 1 0:00 0 -
Rule Arg 1931 only - Oct 15 0:00 1:00 -
@@ -792,7 +792,7 @@ Zone America/La_Paz -4:32:36 - LMT 1890
# From Paul Eggert (2013-10-17):
# For now, assume western Amazonas will change as well.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
# Decree 20,466 (1931-10-01)
# Decree 21,896 (1932-01-10)
Rule Brazil 1931 only - Oct 3 11:00 1:00 -
@@ -1281,7 +1281,7 @@ Zone America/Rio_Branco -4:31:12 - LMT 1914
# For now, assume that they will not revert,
# since they have extended the expiration date once already.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Chile 1927 1931 - Sep 1 0:00 1:00 -
Rule Chile 1928 1932 - Apr 1 0:00 0 -
Rule Chile 1968 only - Nov 3 4:00u 1:00 -
@@ -1381,7 +1381,7 @@ Zone Antarctica/Palmer 0 - -00 1965
# Milne gives 4:56:16.4 for Bogotá time in 1899; round to nearest. He writes,
# "A variation of fifteen minutes in the public clocks of Bogota is not rare."
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule CO 1992 only - May 3 0:00 1:00 -
Rule CO 1993 only - Apr 4 0:00 0 -
# Zone NAME STDOFF RULES FORMAT [UNTIL]
@@ -1441,7 +1441,7 @@ Link America/Curacao America/Kralendijk # Caribbean Netherlands
# (Not one step back), the clocks went back in 1993 and the experiment was not
# repeated. For now, assume transitions were at 00:00 local time country-wide.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Ecuador 1992 only - Nov 28 0:00 1:00 -
Rule Ecuador 1993 only - Feb 5 0:00 0 -
#
@@ -1535,7 +1535,7 @@ Zone Pacific/Galapagos -5:58:24 - LMT 1931 # Puerto Baquerizo Moreno
# For now we will assume permanent -03 for the Falklands
# until advised differently (to apply for 2012 and beyond, after the 2011
# experiment was apparently successful.)
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Falk 1937 1938 - Sep lastSun 0:00 1:00 -
Rule Falk 1938 1942 - Mar Sun>=19 0:00 0 -
Rule Falk 1939 only - Oct 1 0:00 1:00 -
@@ -1581,7 +1581,7 @@ Zone America/Guyana -3:52:40 - LMT 1915 Mar # Georgetown
# No time of the day is established for the adjustment, so people normally
# adjust their clocks at 0 hour of the given dates.
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Para 1975 1988 - Oct 1 0:00 1:00 -
Rule Para 1975 1978 - Mar 1 0:00 0 -
Rule Para 1979 1991 - Apr 1 0:00 0 -
@@ -1674,7 +1674,7 @@ Zone America/Asuncion -3:50:40 - LMT 1890
# From Paul Eggert (2006-03-22):
# Shanks & Pottenger don't have this transition. Assume 1986 was like 1987.
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Peru 1938 only - Jan 1 0:00 1:00 -
Rule Peru 1938 only - Apr 1 0:00 0 -
Rule Peru 1938 1939 - Sep lastSun 0:00 1:00 -
@@ -1770,7 +1770,7 @@ Link America/Port_of_Spain America/Tortola # Virgin Islands (UK)
# https://www.impo.com.uy/diariooficial/1926/03/10/2
# https://www.impo.com.uy/diariooficial/1926/03/18/2
#
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
+# Rule NAME FROM TO - IN ON AT SAVE LETTER/S
Rule Uruguay 1923 1925 - Oct 1 0:00 0:30 -
Rule Uruguay 1924 1926 - Apr 1 0:00 0 -
# From Tim Parenti (2018-02-15):
diff --git a/make/data/tzdata/systemv b/make/data/tzdata/systemv
deleted file mode 100644
index 9525ec47171..00000000000
--- a/make/data/tzdata/systemv
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-#
-# This code is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License version 2 only, as
-# published by the Free Software Foundation. Oracle designates this
-# particular file as subject to the "Classpath" exception as provided
-# by Oracle in the LICENSE file that accompanied this code.
-#
-# This code is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-# version 2 for more details (a copy is included in the LICENSE file that
-# accompanied this code).
-#
-# You should have received a copy of the GNU General Public License version
-# 2 along with this work; if not, write to the Free Software Foundation,
-# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-# or visit www.oracle.com if you need additional information or have any
-# questions.
-#
-# tzdb data for System V rules (this file is obsolete)
-
-# This file is in the public domain, so clarified as of
-# 2009-05-17 by Arthur David Olson.
-
-# Old rules, should the need arise.
-# No attempt is made to handle Newfoundland, since it cannot be expressed
-# using the System V "TZ" scheme (half-hour offset), or anything outside
-# North America (no support for non-standard DST start/end dates), nor
-# the changes in the DST rules in the US after 1976 (which occurred after
-# the old rules were written).
-#
-# If you need the old rules, uncomment ## lines.
-# Compile this *without* leap second correction for true conformance.
-
-# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S
-Rule SystemV min 1973 - Apr lastSun 2:00 1:00 D
-Rule SystemV min 1973 - Oct lastSun 2:00 0 S
-Rule SystemV 1974 only - Jan 6 2:00 1:00 D
-Rule SystemV 1974 only - Nov lastSun 2:00 0 S
-Rule SystemV 1975 only - Feb 23 2:00 1:00 D
-Rule SystemV 1975 only - Oct lastSun 2:00 0 S
-Rule SystemV 1976 max - Apr lastSun 2:00 1:00 D
-Rule SystemV 1976 max - Oct lastSun 2:00 0 S
-
-# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL]
-## Zone SystemV/AST4ADT -4:00 SystemV A%sT
-## Zone SystemV/EST5EDT -5:00 SystemV E%sT
-## Zone SystemV/CST6CDT -6:00 SystemV C%sT
-## Zone SystemV/MST7MDT -7:00 SystemV M%sT
-## Zone SystemV/PST8PDT -8:00 SystemV P%sT
-## Zone SystemV/YST9YDT -9:00 SystemV Y%sT
-## Zone SystemV/AST4 -4:00 - AST
-## Zone SystemV/EST5 -5:00 - EST
-## Zone SystemV/CST6 -6:00 - CST
-## Zone SystemV/MST7 -7:00 - MST
-## Zone SystemV/PST8 -8:00 - PST
-## Zone SystemV/YST9 -9:00 - YST
-## Zone SystemV/HST10 -10:00 - HST
diff --git a/make/devkit/createJMHBundle.sh b/make/devkit/createJMHBundle.sh
index b56950c41ec..b460ee75311 100644
--- a/make/devkit/createJMHBundle.sh
+++ b/make/devkit/createJMHBundle.sh
@@ -26,7 +26,7 @@
# Create a bundle in the build directory, containing what's needed to
# build and run JMH microbenchmarks from the OpenJDK build.
-JMH_VERSION=1.21
+JMH_VERSION=1.26
COMMONS_MATH3_VERSION=3.2
JOPT_SIMPLE_VERSION=4.6
diff --git a/make/devkit/createMacosxDevkit.sh b/make/devkit/createMacosxDevkit.sh
index 2a7dfe2037b..cd105823366 100644
--- a/make/devkit/createMacosxDevkit.sh
+++ b/make/devkit/createMacosxDevkit.sh
@@ -91,7 +91,6 @@ EXCLUDE_DIRS=" \
Platforms/AppleTVSimulator.platform \
Platforms/iPhoneSimulator.platform \
Platforms/WatchSimulator.platform \
- Contents/SharedFrameworks/LLDB.framework \
Contents/SharedFrameworks/ModelIO.framework \
Contents/SharedFrameworks/XCSUI.framework \
Contents/SharedFrameworks/SceneKit.framework \
diff --git a/make/devkit/createWindowsDevkit2017.sh b/make/devkit/createWindowsDevkit2017.sh
index 91227259bdf..42c13251293 100644
--- a/make/devkit/createWindowsDevkit2017.sh
+++ b/make/devkit/createWindowsDevkit2017.sh
@@ -138,8 +138,8 @@ cp -r "$VS_INSTALL_DIR/$REDIST_SUBDIR/x86" $DEVKIT_ROOT/VC/redist/
cp $DEVKIT_ROOT/VC/redist/x86/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x86
cp $DEVKIT_ROOT/VC/redist/x86/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x86
cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64
-cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64
-cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64
+cp $DEVKIT_ROOT/VC/redist/x64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x64
+cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/arm64
cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64
################################################################################
diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk
index 733658d5d8b..fb7d48f1e27 100644
--- a/make/hotspot/gensrc/GensrcAdlc.gmk
+++ b/make/hotspot/gensrc/GensrcAdlc.gmk
@@ -138,6 +138,7 @@ ifeq ($(call check-jvm-feature, compiler2), true)
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
+ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
)))
endif
diff --git a/make/hotspot/gensrc/GensrcJvmti.gmk b/make/hotspot/gensrc/GensrcJvmti.gmk
index 312c8bc737a..b31a6f52292 100644
--- a/make/hotspot/gensrc/GensrcJvmti.gmk
+++ b/make/hotspot/gensrc/GensrcJvmti.gmk
@@ -106,17 +106,6 @@ $(eval $(call SetupJvmtiGeneration, jvmti.h, jvmtiH.xsl, \
$(eval $(call SetupJvmtiGeneration, jvmti.html, jvmti.xsl, \
-PARAM majorversion $(VERSION_FEATURE)))
-JVMTI_BC_SRCDIR := $(TOPDIR)/src/hotspot/share/interpreter/zero
-
-ifeq ($(call check-jvm-feature, zero), true)
- $(eval $(call SetupXslTransform, bytecodeInterpreterWithChecks.cpp, \
- XML_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xml, \
- XSL_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xsl, \
- OUTPUT_DIR := $(JVMTI_OUTPUTDIR), \
- DEPS := $(JVMTI_BC_SRCDIR)/bytecodeInterpreter.cpp, \
- ))
-endif
-
################################################################################
# Copy jvmti.h to include dir
diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk
index 441c09a3853..65edd047571 100644
--- a/make/hotspot/lib/CompileJvm.gmk
+++ b/make/hotspot/lib/CompileJvm.gmk
@@ -91,11 +91,11 @@ DISABLED_WARNINGS_clang := tautological-compare \
undefined-var-template sometimes-uninitialized unknown-pragmas \
delete-non-virtual-dtor missing-braces char-subscripts \
ignored-qualifiers missing-field-initializers mismatched-tags \
- shift-negative-value
+ shift-negative-value misleading-indentation
DISABLED_WARNINGS_xlc := tautological-compare shift-negative-value
-DISABLED_WARNINGS_microsoft := 4100 4127 4201 4244 4291 4351 \
+DISABLED_WARNINGS_microsoft := 4100 4127 4146 4201 4244 4291 4351 \
4511 4512 4514 4624 4996
################################################################################
diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
index 3647806e1d7..d96d006c5fc 100644
--- a/make/hotspot/lib/JvmFeatures.gmk
+++ b/make/hotspot/lib/JvmFeatures.gmk
@@ -126,6 +126,7 @@ ifneq ($(call check-jvm-feature, cds), true)
dynamicArchive.cpp \
filemap.cpp \
heapShared.cpp \
+ lambdaFormInvokers.cpp \
metaspaceShared.cpp \
metaspaceShared_$(HOTSPOT_TARGET_CPU).cpp \
metaspaceShared_$(HOTSPOT_TARGET_CPU_ARCH).cpp \
diff --git a/make/hotspot/symbols/symbols-aix b/make/hotspot/symbols/symbols-aix
index 0efd2dba97f..92703573a5f 100644
--- a/make/hotspot/symbols/symbols-aix
+++ b/make/hotspot/symbols/symbols-aix
@@ -21,7 +21,7 @@
# questions.
#
-JVM_handle_linux_signal
+JVM_handle_aix_signal
numa_error
numa_warn
sysThreadAvailableStackWithSlack
diff --git a/make/hotspot/symbols/symbols-unix b/make/hotspot/symbols/symbols-unix
index 97aa40b970b..1781d84ab94 100644
--- a/make/hotspot/symbols/symbols-unix
+++ b/make/hotspot/symbols/symbols-unix
@@ -143,14 +143,15 @@ JVM_InternString
JVM_Interrupt
JVM_InvokeMethod
JVM_IsArrayClass
-JVM_IsDynamicDumpingEnabled
-JVM_IsSharingEnabled
+JVM_IsCDSDumpingEnabled
JVM_IsConstructorIx
+JVM_IsDumpingClassList
JVM_IsHiddenClass
JVM_IsInterface
JVM_IsPrimitiveClass
JVM_IsRecord
JVM_IsSameClassPackage
+JVM_IsSharingEnabled
JVM_IsSupportedJNIVersion
JVM_IsThreadAlive
JVM_IsVMGeneratedMethodIx
@@ -158,6 +159,7 @@ JVM_LatestUserDefinedLoader
JVM_LoadLibrary
JVM_LookupDefineClass
JVM_LookupLambdaProxyClassFromArchive
+JVM_LogLambdaFormInvoker
JVM_MaxMemory
JVM_MaxObjectInspectionAge
JVM_MonitorNotify
@@ -169,11 +171,13 @@ JVM_NativePath
JVM_NewArray
JVM_NewInstanceFromConstructor
JVM_NewMultiArray
+JVM_PhantomReferenceRefersTo
JVM_RaiseSignal
JVM_RawMonitorCreate
JVM_RawMonitorDestroy
JVM_RawMonitorEnter
JVM_RawMonitorExit
+JVM_ReferenceRefersTo
JVM_RegisterLambdaProxyClassForArchiving
JVM_RegisterSignal
JVM_ReleaseUTF
diff --git a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
index 54c60eb43d1..653a1db10dd 100644
--- a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
+++ b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,24 @@
package build.tools.blacklistedcertsconverter;
+import java.io.IOException;
+import java.math.BigInteger;
import java.security.MessageDigest;
+import java.security.PublicKey;
import java.security.cert.Certificate;
import java.security.cert.CertificateFactory;
import java.security.cert.X509Certificate;
+import java.security.interfaces.ECPublicKey;
+import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
+import java.util.List;
import java.util.Set;
import java.util.TreeSet;
+import sun.security.util.DerInputStream;
+import sun.security.util.DerOutputStream;
+import sun.security.util.DerValue;
/**
* Converts blacklisted.certs.pem from System.in to blacklisted.certs in
@@ -75,8 +85,8 @@ public static void main(String[] args) throws Exception {
// Output sorted so that it's easy to locate an entry.
Set fingerprints = new TreeSet<>();
for (Certificate cert: certs) {
- fingerprints.add(
- getCertificateFingerPrint(mdAlg, (X509Certificate)cert));
+ fingerprints.addAll(
+ getCertificateFingerPrints(mdAlg, (X509Certificate)cert));
}
for (String s: fingerprints) {
@@ -97,17 +107,90 @@ private static void byte2hex(byte b, StringBuffer buf) {
}
/**
- * Gets the requested finger print of the certificate.
+ * Computes the possible fingerprints of the certificate.
*/
- private static String getCertificateFingerPrint(
+ private static List getCertificateFingerPrints(
String mdAlg, X509Certificate cert) throws Exception {
- byte[] encCertInfo = cert.getEncoded();
- MessageDigest md = MessageDigest.getInstance(mdAlg);
- byte[] digest = md.digest(encCertInfo);
- StringBuffer buf = new StringBuffer();
- for (int i = 0; i < digest.length; i++) {
- byte2hex(digest[i], buf);
+ List fingerprints = new ArrayList<>();
+ for (byte[] encoding : altEncodings(cert)) {
+ MessageDigest md = MessageDigest.getInstance(mdAlg);
+ byte[] digest = md.digest(encoding);
+ StringBuffer buf = new StringBuffer();
+ for (int i = 0; i < digest.length; i++) {
+ byte2hex(digest[i], buf);
+ }
+ fingerprints.add(buf.toString());
+ }
+ return fingerprints;
+ }
+
+ private static List altEncodings(X509Certificate c)
+ throws Exception {
+ List result = new ArrayList<>();
+
+ DerValue d = new DerValue(c.getEncoded());
+ DerValue[] seq = new DerValue[3];
+ // tbsCertificate
+ seq[0] = d.data.getDerValue();
+ // signatureAlgorithm
+ seq[1] = d.data.getDerValue();
+ // signature
+ seq[2] = d.data.getDerValue();
+
+ List algIds = Arrays.asList(seq[1], altAlgId(seq[1]));
+
+ List sigs;
+ PublicKey p = c.getPublicKey();
+ if (p instanceof ECPublicKey) {
+ ECPublicKey ep = (ECPublicKey) p;
+ BigInteger mod = ep.getParams().getOrder();
+ sigs = Arrays.asList(seq[2], altSig(mod, seq[2]));
+ } else {
+ sigs = Arrays.asList(seq[2]);
+ }
+
+ for (DerValue algId : algIds) {
+ for (DerValue sig : sigs) {
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.putDerValue(seq[0]);
+ tmp.putDerValue(algId);
+ tmp.putDerValue(sig);
+ DerOutputStream tmp2 = new DerOutputStream();
+ tmp2.write(DerValue.tag_Sequence, tmp);
+ result.add(tmp2.toByteArray());
+ }
+ }
+ return result;
+ }
+
+ private static DerValue altSig(BigInteger mod, DerValue sig)
+ throws IOException {
+ byte[] sigBits = sig.getBitString();
+ DerInputStream in =
+ new DerInputStream(sigBits, 0, sigBits.length, false);
+ DerValue[] values = in.getSequence(2);
+ BigInteger r = values[0].getBigInteger();
+ BigInteger s = values[1].getBigInteger();
+ BigInteger s2 = s.negate().mod(mod);
+ DerOutputStream out = new DerOutputStream();
+ out.putInteger(r);
+ out.putInteger(s2);
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.putBitString(new DerValue(DerValue.tag_Sequence,
+ out.toByteArray()).toByteArray());
+ return new DerValue(tmp.toByteArray());
+ }
+
+ private static DerValue altAlgId(DerValue algId) throws IOException {
+ DerInputStream in = algId.toDerInputStream();
+ DerOutputStream bytes = new DerOutputStream();
+ bytes.putOID(in.getOID());
+ // encode parameters as NULL if not present or omit if NULL
+ if (in.available() == 0) {
+ bytes.putNull();
}
- return buf.toString();
+ DerOutputStream tmp = new DerOutputStream();
+ tmp.write(DerValue.tag_Sequence, bytes);
+ return new DerValue(tmp.toByteArray());
}
}
diff --git a/make/jdk/src/classes/build/tools/spp/Spp.java b/make/jdk/src/classes/build/tools/spp/Spp.java
index 6921c65667b..2a0cb57bc39 100644
--- a/make/jdk/src/classes/build/tools/spp/Spp.java
+++ b/make/jdk/src/classes/build/tools/spp/Spp.java
@@ -106,7 +106,7 @@ public static void main(String args[]) throws Exception {
static final String LNSEP = System.getProperty("line.separator");
static final String KEY = "([a-zA-Z0-9]+)";
static final String VAR = "([a-zA-Z0-9_\\-]+)";
- static final String TEXT = "([a-zA-Z0-9&;,.<>/#() \\?\\[\\]\\$]+)"; // $ -- hack embedded $var$
+ static final String TEXT = "([\\p{Print}&&[^{#:}]]+)";
static final int GN_NOT = 1;
static final int GN_KEY = 2;
@@ -140,6 +140,10 @@ void append(StringBuffer buf, String ln,
}
}
}
+ if (repl == null) {
+ System.err.println("Error: undefined variable in line " + ln);
+ System.exit(-1);
+ }
vardef.appendReplacement(buf, repl);
}
vardef.appendTail(buf);
diff --git a/make/modules/java.base/Copy.gmk b/make/modules/java.base/Copy.gmk
index 9071f4e6e37..040b7588ba1 100644
--- a/make/modules/java.base/Copy.gmk
+++ b/make/modules/java.base/Copy.gmk
@@ -182,12 +182,16 @@ endif
################################################################################
-$(eval $(call SetupCopyFiles, COPY_NET_PROPERTIES, \
- FILES := $(TOPDIR)/src/java.base/share/conf/net.properties, \
- DEST := $(CONF_DST_DIR), \
-))
+NET_PROPERTIES_SRCS := $(TOPDIR)/src/java.base/share/conf/net.properties \
+ $(TOPDIR)/src/java.base/$(OPENJDK_TARGET_OS_TYPE)/conf/net.properties
+
+NET_PROPERTIES_DST := $(CONF_DST_DIR)/net.properties
+
+$(NET_PROPERTIES_DST): $(NET_PROPERTIES_SRCS)
+ $(call MakeTargetDir)
+ $(CAT) $(NET_PROPERTIES_SRCS) > $@
-TARGETS += $(COPY_NET_PROPERTIES)
+TARGETS += $(NET_PROPERTIES_DST)
ifeq ($(call isTargetOs, linux), true)
$(eval $(call SetupCopyFiles, COPY_SDP_CONF, \
diff --git a/make/modules/java.base/gendata/GendataTZDB.gmk b/make/modules/java.base/gendata/GendataTZDB.gmk
index 54e6582d81d..1352178694f 100644
--- a/make/modules/java.base/gendata/GendataTZDB.gmk
+++ b/make/modules/java.base/gendata/GendataTZDB.gmk
@@ -29,7 +29,7 @@ GENDATA_TZDB :=
# Time zone data file creation
#
TZDATA_DIR := $(TOPDIR)/make/data/tzdata
-TZDATA_TZFILE := africa antarctica asia australasia europe northamerica pacificnew southamerica backward etcetera gmt jdk11_backward
+TZDATA_TZFILE := africa antarctica asia australasia europe northamerica southamerica backward etcetera gmt jdk11_backward
TZDATA_TZFILES := $(addprefix $(TZDATA_DIR)/,$(TZDATA_TZFILE))
GENDATA_TZDB_DAT := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)/tzdb.dat
diff --git a/make/modules/java.base/lib/CoreLibraries.gmk b/make/modules/java.base/lib/CoreLibraries.gmk
index f2b94fe717e..1d5fede2aa8 100644
--- a/make/modules/java.base/lib/CoreLibraries.gmk
+++ b/make/modules/java.base/lib/CoreLibraries.gmk
@@ -49,7 +49,7 @@ $(eval $(call SetupNativeCompilation, BUILD_LIBFDLIBM, \
CFLAGS_windows_debug := -DLOGGING, \
CFLAGS_aix := -qfloat=nomaf, \
DISABLED_WARNINGS_gcc := sign-compare misleading-indentation array-bounds, \
- DISABLED_WARNINGS_clang := sign-compare, \
+ DISABLED_WARNINGS_clang := sign-compare misleading-indentation, \
DISABLED_WARNINGS_microsoft := 4146 4244 4018, \
ARFLAGS := $(ARFLAGS), \
OBJECT_DIR := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libfdlibm, \
diff --git a/make/modules/java.desktop/lib/Awt2dLibraries.gmk b/make/modules/java.desktop/lib/Awt2dLibraries.gmk
index 7fbd1049f89..3203378d00a 100644
--- a/make/modules/java.desktop/lib/Awt2dLibraries.gmk
+++ b/make/modules/java.desktop/lib/Awt2dLibraries.gmk
@@ -435,7 +435,6 @@ endif
ifeq ($(USE_EXTERNAL_HARFBUZZ), true)
LIBHARFBUZZ_LIBS := $(HARFBUZZ_LIBS)
else
- HARFBUZZ_CFLAGS := -DHAVE_OT -DHAVE_FALLBACK -DHAVE_UCDN -DHAVE_ROUND
# This is better than adding EXPORT_ALL_SYMBOLS
ifneq ($(filter $(TOOLCHAIN_TYPE), gcc clang), )
@@ -493,7 +492,7 @@ else
maybe-uninitialized class-memaccess, \
DISABLED_WARNINGS_clang := unused-value incompatible-pointer-types \
tautological-constant-out-of-range-compare int-to-pointer-cast \
- undef missing-field-initializers, \
+ undef missing-field-initializers range-loop-analysis, \
DISABLED_WARNINGS_microsoft := 4267 4244 4090 4146 4334 4819 4101 4068 4805 4138, \
LDFLAGS := $(LDFLAGS_JDKLIB) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
diff --git a/make/modules/jdk.javadoc/Gendata.gmk b/make/modules/jdk.javadoc/Gendata.gmk
index 5b4485808c7..0ee146a1e21 100644
--- a/make/modules/jdk.javadoc/Gendata.gmk
+++ b/make/modules/jdk.javadoc/Gendata.gmk
@@ -54,7 +54,7 @@ $(eval $(call SetupJavaCompilation, COMPILE_CREATE_SYMBOLS, \
SRC := $(TOPDIR)/make/langtools/src/classes \
$(TOPDIR)/src/jdk.jdeps/share/classes, \
INCLUDES := build/tools/symbolgenerator com/sun/tools/classfile, \
- BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols, \
+ BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc, \
DISABLED_WARNINGS := options, \
JAVAC_FLAGS := \
$(INTERIM_LANGTOOLS_ARGS) \
@@ -71,7 +71,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \
$(ECHO) Creating javadoc element list
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
- -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
+ -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \
build.tools.symbolgenerator.CreateSymbols \
build-javadoc-data \
$(CT_DATA_DESCRIPTION) \
@@ -79,7 +79,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \
11
$(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \
$(COMPILECREATESYMBOLS_ADD_EXPORTS) \
- -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \
+ -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \
build.tools.symbolgenerator.JavadocElementList \
$(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases/element-list-$(JDK_SOURCE_TARGET_VERSION).txt \
$(JAVADOC_MODULESOURCEPATH) \
diff --git a/make/modules/jdk.incubator.jpackage/Gensrc.gmk b/make/modules/jdk.jpackage/Gensrc.gmk
similarity index 93%
rename from make/modules/jdk.incubator.jpackage/Gensrc.gmk
rename to make/modules/jdk.jpackage/Gensrc.gmk
index 5948a80f120..6f3e8b08119 100644
--- a/make/modules/jdk.incubator.jpackage/Gensrc.gmk
+++ b/make/modules/jdk.jpackage/Gensrc.gmk
@@ -31,7 +31,7 @@ include GensrcCommonJdk.gmk
ifeq ($(call isTargetOs, macosx), true)
ENTITLEMENTS_SRC_FILE := $(TOPDIR)/make/data/macosxsigning/java.plist
ENTITLEMENTS_TARGET_FILE := \
- $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/incubator/jpackage/internal/resources/entitlements.plist
+ $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/jpackage/internal/resources/entitlements.plist
$(ENTITLEMENTS_TARGET_FILE): $(ENTITLEMENTS_SRC_FILE)
$(call install-file)
diff --git a/make/modules/jdk.incubator.jpackage/Launcher.gmk b/make/modules/jdk.jpackage/Launcher.gmk
similarity index 95%
rename from make/modules/jdk.incubator.jpackage/Launcher.gmk
rename to make/modules/jdk.jpackage/Launcher.gmk
index 7a25dae733c..8d553d5c107 100644
--- a/make/modules/jdk.incubator.jpackage/Launcher.gmk
+++ b/make/modules/jdk.jpackage/Launcher.gmk
@@ -26,5 +26,5 @@
include LauncherCommon.gmk
$(eval $(call SetupBuildLauncher, jpackage, \
- MAIN_CLASS := jdk.incubator.jpackage.main.Main, \
+ MAIN_CLASS := jdk.jpackage.main.Main, \
))
diff --git a/make/modules/jdk.incubator.jpackage/Lib.gmk b/make/modules/jdk.jpackage/Lib.gmk
similarity index 91%
rename from make/modules/jdk.incubator.jpackage/Lib.gmk
rename to make/modules/jdk.jpackage/Lib.gmk
index 7ffef99afe4..7dfb70be5a6 100644
--- a/make/modules/jdk.incubator.jpackage/Lib.gmk
+++ b/make/modules/jdk.jpackage/Lib.gmk
@@ -29,8 +29,8 @@ include LibCommon.gmk
JPACKAGE_APPLAUNCHER_SRC := \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, applauncher) \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common)
+ $(call FindSrcDirsForComponent, jdk.jpackage, applauncher) \
+ $(call FindSrcDirsForComponent, jdk.jpackage, common)
ifeq ($(call isTargetOs, windows), true)
@@ -42,7 +42,7 @@ else
endif
-JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/incubator/jpackage/internal/resources
+JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/jpackage/internal/resources
JPACKAGE_CXXFLAGS_windows := -EHsc -DUNICODE -D_UNICODE
# Output app launcher executable in resources dir, and symbols in the object dir
@@ -73,7 +73,7 @@ ifeq ($(call isTargetOs, windows), true)
$(eval $(call SetupJdkLibrary, BUILD_LIB_JPACKAGE, \
NAME := jpackage, \
OPTIMIZATION := LOW, \
- EXTRA_SRC := jdk.incubator.jpackage:common, \
+ EXTRA_SRC := jdk.jpackage:common, \
CXXFLAGS := $(CXXFLAGS_JDKLIB) $(JPACKAGE_CXXFLAGS_windows), \
LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK) \
$(call SET_SHARED_LIBRARY_ORIGIN), \
@@ -99,8 +99,8 @@ ifeq ($(call isTargetOs, windows), true)
TARGETS += $(BUILD_LIB_WIXHELPER)
JPACKAGE_MSIWRAPPER_SRC := \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, msiwrapper) \
- $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common)
+ $(call FindSrcDirsForComponent, jdk.jpackage, msiwrapper) \
+ $(call FindSrcDirsForComponent, jdk.jpackage, common)
# Build exe installer wrapper for msi installer
$(eval $(call SetupJdkExecutable, BUILD_JPACKAGE_MSIWRAPPER, \
diff --git a/make/scripts/compare.sh b/make/scripts/compare.sh
index 25630199a21..5d0e846e755 100644
--- a/make/scripts/compare.sh
+++ b/make/scripts/compare.sh
@@ -696,7 +696,7 @@ compare_bin_file() {
# pdb files.
PDB_DIRS="$(ls -d \
{$OTHER,$THIS}/support/modules_{cmds,libs}/{*,*/*} \
- {$OTHER,$THIS}/support/native/jdk.incubator.jpackage/* \
+ {$OTHER,$THIS}/support/native/jdk.jpackage/* \
)"
export _NT_SYMBOL_PATH="$(echo $PDB_DIRS | tr ' ' ';')"
fi
diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk
index 3bbbea47b8e..55e5026eb38 100644
--- a/make/test/BuildMicrobenchmark.gmk
+++ b/make/test/BuildMicrobenchmark.gmk
@@ -90,11 +90,10 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \
TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \
SMALL_JAVA := false, \
CLASSPATH := $(MICROBENCHMARK_CLASSPATH), \
- DISABLED_WARNINGS := processing rawtypes cast serial preview, \
+ DISABLED_WARNINGS := processing rawtypes cast serial, \
SRC := $(MICROBENCHMARK_SRC), \
BIN := $(MICROBENCHMARK_CLASSES), \
JAVA_FLAGS := --add-modules jdk.unsupported --limit-modules java.management, \
- JAVAC_FLAGS := --enable-preview, \
))
$(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS)
diff --git a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
index 210970f6469..3c0f936358c 100644
--- a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
+++ b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java
@@ -73,14 +73,16 @@ public static void init() {
ColorSpace.CS_sRGB,
ColorSpace.CS_GRAY,
ColorSpace.CS_LINEAR_RGB,
- ColorSpace.CS_CIEXYZ
+ ColorSpace.CS_CIEXYZ,
+ ColorSpace.CS_PYCC
};
String[] csNames = new String[]{
"CS_sRGB",
"CS_GRAY",
"CS_LINEAR_RGB",
- "CS_CIEXYZ"
+ "CS_CIEXYZ",
+ "CS_PYCC"
};
csList = new Option.IntList(cmmOptRoot,
diff --git a/src/demo/share/jfc/Notepad/Notepad.java b/src/demo/share/jfc/Notepad/Notepad.java
index 3ebe3f07d22..cb4552f94cf 100644
--- a/src/demo/share/jfc/Notepad/Notepad.java
+++ b/src/demo/share/jfc/Notepad/Notepad.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -60,7 +60,7 @@
* @author Timothy Prinzing
*/
@SuppressWarnings("serial")
-class Notepad extends JPanel {
+public class Notepad extends JPanel {
protected static Properties properties;
private static ResourceBundle resources;
diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
index 104104b09a9..615fe5e045f 100644
--- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py
+++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py
@@ -1,4 +1,7 @@
+import os
import random
+import subprocess
+import sys
AARCH64_AS = "as"
AARCH64_OBJDUMP = "objdump"
@@ -129,6 +132,8 @@ class OperandFactory:
_modes = {'x' : GeneralRegister,
'w' : GeneralRegister,
+ 'b' : FloatRegister,
+ 'h' : FloatRegister,
's' : FloatRegister,
'd' : FloatRegister,
'z' : FloatZero,
@@ -198,16 +203,16 @@ def __init__(self, name, mode):
self.isFloat = (mode == 'd') | (mode == 's')
if self.isFloat:
self.isWord = mode != 'd'
- self.asmRegPrefix = ["d", "s"][self.isWord]
+ self.asmRegPrefix = ["d", "s"][self.isWord]
else:
self.isWord = mode != 'x'
self.asmRegPrefix = ["x", "w"][self.isWord]
-
+
def name(self):
return self._name + (self.mode if self.mode != 'x' else '')
-
+
def aname(self):
- return (self._name+mode if (mode == 'b' or mode == 'h')
+ return (self._name+mode if (mode == 'b' or mode == 'h')
else self._name)
class ThreeRegInstruction(Instruction):
@@ -220,17 +225,17 @@ def generate(self):
def cstr(self):
return (super(ThreeRegInstruction, self).cstr()
- + ('%s, %s, %s'
+ + ('%s, %s, %s'
% (self.reg[0],
self.reg[1], self.reg[2])))
-
+
def astr(self):
prefix = self.asmRegPrefix
return (super(ThreeRegInstruction, self).astr()
- + ('%s, %s, %s'
+ + ('%s, %s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix), self.reg[2].astr(prefix))))
-
+
class FourRegInstruction(ThreeRegInstruction):
def generate(self):
@@ -241,12 +246,12 @@ def generate(self):
def cstr(self):
return (super(FourRegInstruction, self).cstr()
+ (', %s' % self.reg[3]))
-
+
def astr(self):
prefix = self.asmRegPrefix
return (super(FourRegInstruction, self).astr()
+ (', %s' % self.reg[3].astr(prefix)))
-
+
class TwoRegInstruction(Instruction):
def generate(self):
@@ -261,17 +266,17 @@ def cstr(self):
def astr(self):
prefix = self.asmRegPrefix
return (super(TwoRegInstruction, self).astr()
- + ('%s, %s'
+ + ('%s, %s'
% (self.reg[0].astr(prefix),
self.reg[1].astr(prefix))))
-
+
class TwoRegImmedInstruction(TwoRegInstruction):
def generate(self):
super(TwoRegImmedInstruction, self).generate()
self.immed = random.randint(0, 1<<11 -1)
return self
-
+
def cstr(self):
return (super(TwoRegImmedInstruction, self).cstr()
+ ', %su' % self.immed)
@@ -301,9 +306,9 @@ def generate(self):
self.kind = ShiftKind().generate()
self.distance = random.randint(0, (1<<5)-1 if self.isWord else (1<<6)-1)
return self
-
+
def cstr(self):
- return ('%s, Assembler::%s, %s);'
+ return ('%s, Assembler::%s, %s);'
% (ThreeRegInstruction.cstr(self),
self.kind.cstr(), self.distance))
@@ -314,9 +319,9 @@ def astr(self):
self.distance))
class AddSubCarryOp(ThreeRegInstruction):
-
+
def cstr(self):
- return ('%s);'
+ return ('%s);'
% (ThreeRegInstruction.cstr(self)))
class AddSubExtendedOp(ThreeRegInstruction):
@@ -332,76 +337,75 @@ def generate(self):
def cstr(self):
return (super(AddSubExtendedOp, self).cstr()
- + (", ext::" + AddSubExtendedOp.optNames[self.option]
+ + (", ext::" + AddSubExtendedOp.optNames[self.option]
+ ", " + str(self.amount) + ");"))
-
+
def astr(self):
return (super(AddSubExtendedOp, self).astr()
- + (", " + AddSubExtendedOp.optNames[self.option]
+ + (", " + AddSubExtendedOp.optNames[self.option]
+ " #" + str(self.amount)))
class AddSubImmOp(TwoRegImmedInstruction):
def cstr(self):
return super(AddSubImmOp, self).cstr() + ");"
-
+
class LogicalImmOp(AddSubImmOp):
# These tables are legal immediate logical operands
immediates32 \
- = [0x1, 0x3f, 0x1f0, 0x7e0,
- 0x1c00, 0x3ff0, 0x8000, 0x1e000,
- 0x3e000, 0x78000, 0xe0000, 0x100000,
- 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
- 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
- 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
- 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
- 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
- 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
- 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
- 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
- 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
- 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
- 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
- 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
- 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
+ = [0x1, 0x3f, 0x1f0, 0x7e0,
+ 0x1c00, 0x3ff0, 0x8000, 0x1e000,
+ 0x3e000, 0x78000, 0xe0000, 0x100000,
+ 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8,
+ 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0,
+ 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00,
+ 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe,
+ 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80,
+ 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f,
+ 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003,
+ 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff,
+ 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff,
+ 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87,
+ 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1,
+ 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff,
+ 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07,
0xffffffbf, 0xfffffffd]
immediates \
- = [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
- 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
- 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
- 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
- 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
- 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
- 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
- 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
- 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
- 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
- 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
- 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
- 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
- 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
- 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
+ = [0x1, 0x1f80, 0x3fff0, 0x3ffffc,
+ 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000,
+ 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000,
+ 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000,
+ 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000,
+ 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000,
+ 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8,
+ 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000,
+ 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff,
+ 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003,
+ 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff,
+ 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000,
+ 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f,
+ 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff,
+ 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff,
0xfffffffffc01ffff, 0xffffffffffc00003, 0xfffffffffffc000f, 0xffffffffffffe07f]
def generate(self):
AddSubImmOp.generate(self)
self.immed = \
self.immediates32[random.randint(0, len(self.immediates32)-1)] \
- if self.isWord \
- else \
- self.immediates[random.randint(0, len(self.immediates)-1)]
-
+ if self.isWord else \
+ self.immediates[random.randint(0, len(self.immediates)-1)]
+
return self
-
+
def astr(self):
return (super(TwoRegImmedInstruction, self).astr()
+ ', #0x%x' % self.immed)
def cstr(self):
return super(AddSubImmOp, self).cstr() + "ll);"
-
+
class MultiOp():
def multipleForms(self):
@@ -422,9 +426,9 @@ def astr(self):
return Instruction.astr(self) + "%s"
class RegAndAbsOp(MultiOp, Instruction):
-
+
def multipleForms(self):
- if self.name() == "adrp":
+ if self.name() == "adrp":
# We can only test one form of adrp because anything other
# than "adrp ." requires relocs in the assembler output
return 1
@@ -434,11 +438,11 @@ def generate(self):
Instruction.generate(self)
self.reg = GeneralRegister().generate()
return self
-
+
def cstr(self):
if self.name() == "adrp":
return "__ _adrp(" + "%s, %s);" % (self.reg, "%s")
- return (super(RegAndAbsOp, self).cstr()
+ return (super(RegAndAbsOp, self).cstr()
+ "%s, %s);" % (self.reg, "%s"))
def astr(self):
@@ -446,14 +450,14 @@ def astr(self):
+ self.reg.astr(self.asmRegPrefix) + ", %s")
class RegImmAbsOp(RegAndAbsOp):
-
+
def cstr(self):
return (Instruction.cstr(self)
+ "%s, %s, %s);" % (self.reg, self.immed, "%s"))
def astr(self):
return (Instruction.astr(self)
- + ("%s, #%s, %s"
+ + ("%s, #%s, %s"
% (self.reg.astr(self.asmRegPrefix), self.immed, "%s")))
def generate(self):
@@ -462,7 +466,7 @@ def generate(self):
return self
class MoveWideImmOp(RegImmAbsOp):
-
+
def multipleForms(self):
return 0
@@ -472,8 +476,8 @@ def cstr(self):
def astr(self):
return (Instruction.astr(self)
- + ("%s, #%s, lsl %s"
- % (self.reg.astr(self.asmRegPrefix),
+ + ("%s, #%s, lsl %s"
+ % (self.reg.astr(self.asmRegPrefix),
self.immed, self.shift)))
def generate(self):
@@ -486,7 +490,7 @@ def generate(self):
return self
class BitfieldOp(TwoRegInstruction):
-
+
def cstr(self):
return (Instruction.cstr(self)
+ ("%s, %s, %s, %s);"
@@ -513,16 +517,16 @@ def generate(self):
def cstr(self):
return (ThreeRegInstruction.cstr(self)
+ (", %s);" % self.lsb))
-
+
def astr(self):
return (ThreeRegInstruction.astr(self)
+ (", #%s" % self.lsb))
-
+
class CondBranchOp(MultiOp, Instruction):
def cstr(self):
return "__ br(Assembler::" + self.name() + ", %s);"
-
+
def astr(self):
return "b." + self.name() + "\t%s"
@@ -530,10 +534,10 @@ class ImmOp(Instruction):
def cstr(self):
return "%s%s);" % (Instruction.cstr(self), self.immed)
-
+
def astr(self):
return Instruction.astr(self) + "#" + str(self.immed)
-
+
def generate(self):
self.immed = random.randint(0, 1<<16 -1)
return self
@@ -542,6 +546,8 @@ class Op(Instruction):
def cstr(self):
return Instruction.cstr(self) + ");"
+ def astr(self):
+ return self.aname();
class SystemOp(Instruction):
@@ -573,11 +579,11 @@ def generate(self):
return self
def cstr(self):
- return (super(ConditionalCompareOp, self).cstr() + ", "
+ return (super(ConditionalCompareOp, self).cstr() + ", "
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (super(ConditionalCompareOp, self).astr() +
+ return (super(ConditionalCompareOp, self).astr() +
", " + conditionCodes[self.cond])
class ConditionalCompareImmedOp(Instruction):
@@ -596,33 +602,33 @@ def cstr(self):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (Instruction.astr(self)
- + self.reg.astr(self.asmRegPrefix)
+ return (Instruction.astr(self)
+ + self.reg.astr(self.asmRegPrefix)
+ ", #" + str(self.immed)
+ ", #" + str(self.immed2)
+ ", " + conditionCodes[self.cond])
class TwoRegOp(TwoRegInstruction):
-
+
def cstr(self):
return TwoRegInstruction.cstr(self) + ");"
class ThreeRegOp(ThreeRegInstruction):
-
+
def cstr(self):
return ThreeRegInstruction.cstr(self) + ");"
class FourRegMulOp(FourRegInstruction):
-
+
def cstr(self):
return FourRegInstruction.cstr(self) + ");"
def astr(self):
isMaddsub = self.name().startswith("madd") | self.name().startswith("msub")
midPrefix = self.asmRegPrefix if isMaddsub else "w"
- return (Instruction.astr(self)
- + self.reg[0].astr(self.asmRegPrefix)
- + ", " + self.reg[1].astr(midPrefix)
+ return (Instruction.astr(self)
+ + self.reg[0].astr(self.asmRegPrefix)
+ + ", " + self.reg[1].astr(midPrefix)
+ ", " + self.reg[2].astr(midPrefix)
+ ", " + self.reg[3].astr(self.asmRegPrefix))
@@ -638,8 +644,8 @@ def cstr(self):
+ "Assembler::" + conditionCodes[self.cond] + ");")
def astr(self):
- return (ThreeRegInstruction.astr(self)
- + ", " + conditionCodes[self.cond])
+ return (ThreeRegInstruction.astr(self)
+ + ", " + conditionCodes[self.cond])
class LoadStoreExclusiveOp(InstructionWithModes):
@@ -651,7 +657,7 @@ def astr(self):
result = self.aname() + '\t'
regs = list(self.regs)
index = regs.pop() # The last reg is the index register
- prefix = ('x' if (self.mode == 'x')
+ prefix = ('x' if (self.mode == 'x')
& ((self.name().startswith("ld"))
| (self.name().startswith("stlr"))) # Ewww :-(
else 'w')
@@ -698,17 +704,17 @@ def aname(self):
return self._name
class Address(object):
-
+
base_plus_unscaled_offset, pre, post, base_plus_reg, \
base_plus_scaled_offset, pcrel, post_reg, base_only = range(8)
- kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
+ kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg",
"base_plus_scaled_offset", "pcrel", "post_reg", "base_only"]
extend_kinds = ["uxtw", "lsl", "sxtw", "sxtx"]
@classmethod
def kindToStr(cls, i):
return cls.kinds[i]
-
+
def generate(self, kind, shift_distance):
self.kind = kind
self.base = GeneralRegister().generate()
@@ -738,7 +744,7 @@ def __str__(self):
Address.pcrel: "",
Address.base_plus_reg: "Address(%s, %s, Address::%s(%s))" \
% (self.base, self.index, self.extend_kind, self.shift_distance),
- Address.base_plus_scaled_offset:
+ Address.base_plus_scaled_offset:
"Address(%s, %s)" % (self.base, self.offset) } [self.kind]
if (self.kind == Address.pcrel):
result = ["__ pc()", "back", "forth"][self.offset]
@@ -758,7 +764,7 @@ def astr(self, prefix):
Address.base_only: "[%s]" % (self.base.astr(prefix)),
Address.pcrel: "",
Address.base_plus_reg: "[%s, %s, %s #%s]" \
- % (self.base.astr(prefix), self.index.astr(extend_prefix),
+ % (self.base.astr(prefix), self.index.astr(extend_prefix),
self.extend_kind, self.shift_distance),
Address.base_plus_scaled_offset: \
"[%s, %s]" \
@@ -767,7 +773,7 @@ def astr(self, prefix):
if (self.kind == Address.pcrel):
result = [".", "back", "forth"][self.offset]
return result
-
+
class LoadStoreOp(InstructionWithModes):
def __init__(self, args):
@@ -822,14 +828,14 @@ def aname(self):
class LoadStorePairOp(InstructionWithModes):
numRegs = 2
-
+
def __init__(self, args):
name, self.asmname, self.kind, mode = args
InstructionWithModes.__init__(self, name, mode)
self.offset = random.randint(-1<<4, 1<<4-1) << 4
-
+
def generate(self):
- self.reg = [OperandFactory.create(self.mode).generate()
+ self.reg = [OperandFactory.create(self.mode).generate()
for i in range(self.numRegs)]
self.base = OperandFactory.create('x').generate()
kindStr = Address.kindToStr(self.kind);
@@ -846,8 +852,8 @@ def astr(self):
address = ["[%s, #%s]", "[%s, #%s]!", "[%s], #%s"][self.kind]
address = address % (self.base.astr('x'), self.offset)
result = "%s\t%s, %s, %s" \
- % (self.asmname,
- self.reg[0].astr(self.asmRegPrefix),
+ % (self.asmname,
+ self.reg[0].astr(self.asmRegPrefix),
self.reg[1].astr(self.asmRegPrefix), address)
return result
@@ -875,7 +881,7 @@ def __init__(self, args):
Instruction.__init__(self, name)
def generate(self):
- self.reg = [OperandFactory.create(self.modes[i]).generate()
+ self.reg = [OperandFactory.create(self.modes[i]).generate()
for i in range(self.numRegs)]
return self
@@ -884,7 +890,7 @@ def cstr(self):
return (formatStr
% tuple([Instruction.cstr(self)] +
[str(self.reg[i]) for i in range(self.numRegs)])) # Yowza
-
+
def astr(self):
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
return (formatStr
@@ -985,7 +991,7 @@ def astr(self):
moreReg +
[str(self.reg[2]) + self._width.astr()])
-class LdStSIMDOp(Instruction):
+class LdStNEONOp(Instruction):
def __init__(self, args):
self._name, self.regnum, self.arrangement, self.addresskind = args
@@ -1004,7 +1010,7 @@ def generate(self):
return self
def cstr(self):
- buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg)
+ buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg)
current = self._firstSIMDreg
for cnt in range(1, self.regnum):
buf = '%s, %s' % (buf, current.nextReg())
@@ -1022,6 +1028,57 @@ def astr(self):
def aname(self):
return self._name
+class NEONReduceInstruction(Instruction):
+ def __init__(self, args):
+ self._name, self.insname, self.arrangement = args
+
+ def generate(self):
+ current = FloatRegister().generate()
+ self.dstSIMDreg = current
+ self.srcSIMDreg = current.nextReg()
+ return self
+
+ def cstr(self):
+ buf = Instruction.cstr(self) + str(self.dstSIMDreg)
+ buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg)
+ return buf
+
+ def astr(self):
+ buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower()))
+ buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement)
+ return buf
+
+ def aname(self):
+ return self._name
+
+class CommonNEONInstruction(Instruction):
+ def __init__(self, args):
+ self._name, self.insname, self.arrangement = args
+
+ def generate(self):
+ self._firstSIMDreg = FloatRegister().generate()
+ return self
+
+ def cstr(self):
+ buf = Instruction.cstr(self) + str(self._firstSIMDreg)
+ buf = '%s, __ T%s' % (buf, self.arrangement)
+ current = self._firstSIMDreg
+ for cnt in range(1, self.numRegs):
+ buf = '%s, %s' % (buf, current.nextReg())
+ current = current.nextReg()
+ return '%s);' % (buf)
+
+ def astr(self):
+ buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement)
+ current = self._firstSIMDreg
+ for cnt in range(1, self.numRegs):
+ buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement)
+ current = current.nextReg()
+ return buf
+
+ def aname(self):
+ return self._name
+
class SHA512SIMDOp(Instruction):
def generate(self):
@@ -1053,6 +1110,44 @@ def astr(self):
+ ('\t%s, %s, %s.2D' % (self.reg[0].astr("q"),
self.reg[1].astr("q"), self.reg[2].astr("v"))))
+class SHA3SIMDOp(Instruction):
+
+ def generate(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ self.reg = [FloatRegister().generate(), FloatRegister().generate(),
+ FloatRegister().generate(), FloatRegister().generate()]
+ else:
+ self.reg = [FloatRegister().generate(), FloatRegister().generate(),
+ FloatRegister().generate()]
+ if (self._name == 'xar'):
+ self.imm6 = random.randint(0, 63)
+ return self
+
+ def cstr(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T16B, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.reg[3])))
+ elif (self._name == 'rax1'):
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T2D, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2])))
+ else:
+ return (super(SHA3SIMDOp, self).cstr()
+ + ('%s, __ T2D, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.imm6)))
+
+ def astr(self):
+ if ((self._name == 'eor3') or (self._name == 'bcax')):
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.16B, %s.16B, %s.16B, %s.16B' % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v"), self.reg[3].astr("v"))))
+ elif (self._name == 'rax1'):
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.2D, %s.2D, %s.2D') % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v")))
+ else:
+ return (super(SHA3SIMDOp, self).astr()
+ + ('\t%s.2D, %s.2D, %s.2D, #%s') % (self.reg[0].astr("v"), self.reg[1].astr("v"),
+ self.reg[2].astr("v"), self.imm6))
+
class LSEOp(Instruction):
def __init__(self, args):
self._name, self.asmname, self.size, self.suffix = args
@@ -1097,6 +1192,12 @@ def aname(self):
def cname(self):
return self._cname
+class TwoRegNEONOp(CommonNEONInstruction):
+ numRegs = 2
+
+class ThreeRegNEONOp(TwoRegNEONOp):
+ numRegs = 3
+
class SpecialCases(Instruction):
def __init__(self, data):
self._name = data[0]
@@ -1129,6 +1230,7 @@ def generate(kind, names):
outfile = open("aarch64ops.s", "w")
+# To minimize the changes of assembler test code
random.seed(0)
print "// BEGIN Generated code -- do not edit"
@@ -1139,18 +1241,18 @@ def generate(kind, names):
outfile.write("back:\n")
-generate (ArithOp,
+generate (ArithOp,
[ "add", "sub", "adds", "subs",
"addw", "subw", "addsw", "subsw",
"and", "orr", "eor", "ands",
- "andw", "orrw", "eorw", "andsw",
- "bic", "orn", "eon", "bics",
+ "andw", "orrw", "eorw", "andsw",
+ "bic", "orn", "eon", "bics",
"bicw", "ornw", "eonw", "bicsw" ])
-generate (AddSubImmOp,
+generate (AddSubImmOp,
[ "addw", "addsw", "subw", "subsw",
"add", "adds", "sub", "subs"])
-generate (LogicalImmOp,
+generate (LogicalImmOp,
[ "andw", "orrw", "eorw", "andsw",
"and", "orr", "eor", "ands"])
@@ -1191,26 +1293,26 @@ def generate(kind, names):
["stxp", mode, 4], ["stlxp", mode, 4]])
for kind in range(6):
- print "\n// " + Address.kindToStr(kind),
+ sys.stdout.write("\n// " + Address.kindToStr(kind))
if kind != Address.pcrel:
- generate (LoadStoreOp,
- [["str", "str", kind, "x"], ["str", "str", kind, "w"],
+ generate (LoadStoreOp,
+ [["str", "str", kind, "x"], ["str", "str", kind, "w"],
["str", "strb", kind, "b"], ["str", "strh", kind, "h"],
- ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
+ ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"],
["ldr", "ldrb", kind, "b"], ["ldr", "ldrh", kind, "h"],
- ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
+ ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"],
["ldrsh", "ldrsh", kind, "w"], ["ldrsw", "ldrsw", kind, "x"],
- ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
- ["str", "str", kind, "d"], ["str", "str", kind, "s"],
+ ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"],
+ ["str", "str", kind, "d"], ["str", "str", kind, "s"],
])
else:
- generate (LoadStoreOp,
+ generate (LoadStoreOp,
[["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"]])
-
+
for kind in (Address.base_plus_unscaled_offset, Address.pcrel, Address.base_plus_reg, \
Address.base_plus_scaled_offset):
- generate (LoadStoreOp,
+ generate (LoadStoreOp,
[["prfm", "prfm\tPLDL1KEEP,", kind, "x"]])
generate(AddSubCarryOp, ["adcw", "adcsw", "sbcw", "sbcsw", "adc", "adcs", "sbc", "sbcs"])
@@ -1219,32 +1321,32 @@ def generate(kind, names):
generate(ConditionalCompareOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
generate(ConditionalCompareImmedOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"])
-generate(ConditionalSelectOp,
+generate(ConditionalSelectOp,
["cselw", "csincw", "csinvw", "csnegw", "csel", "csinc", "csinv", "csneg"])
-generate(TwoRegOp,
- ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
+generate(TwoRegOp,
+ ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit",
"rev16", "rev32", "rev", "clz", "cls"])
-generate(ThreeRegOp,
- ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
+generate(ThreeRegOp,
+ ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv",
"lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"])
-generate(FourRegMulOp,
+generate(FourRegMulOp,
["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"])
-generate(ThreeRegFloatOp,
- [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
+generate(ThreeRegFloatOp,
+ [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"],
["fmuls", "sss"],
- ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
+ ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"],
["fmuld", "ddd"]])
-generate(FourRegFloatOp,
- [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
+generate(FourRegFloatOp,
+ [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"],
["fmaddd", "dddd"], ["fmsubd", "dddd"], ["fnmaddd", "dddd"], ["fnmaddd", "dddd"],])
-generate(TwoRegFloatOp,
- [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
+generate(TwoRegFloatOp,
+ [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"],
["fcvts", "ds"],
- ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
+ ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"],
["fcvtd", "sd"],
])
@@ -1255,18 +1357,18 @@ def generate(kind, names):
["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"],
["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]])
-generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
+generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"],
["fcmps", "sz"], ["fcmpd", "dz"]])
for kind in range(3):
generate(LoadStorePairOp, [["stp", "stp", kind, "w"], ["ldp", "ldp", kind, "w"],
- ["ldpsw", "ldpsw", kind, "x"],
+ ["ldpsw", "ldpsw", kind, "x"],
["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"]
])
generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"],
["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]])
-generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only],
+generate(LdStNEONOp, [["ld1", 1, "8B", Address.base_only],
["ld1", 2, "16B", Address.post],
["ld1", 3, "1D", Address.post_reg],
["ld1", 4, "8H", Address.post],
@@ -1290,7 +1392,92 @@ def generate(kind, names):
["ld4r", 4, "2S", Address.post_reg],
])
-generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
+generate(NEONReduceInstruction,
+ [["addv", "addv", "8B"], ["addv", "addv", "16B"],
+ ["addv", "addv", "4H"], ["addv", "addv", "8H"],
+ ["addv", "addv", "4S"],
+ ["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"],
+ ["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"],
+ ["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"],
+ ["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"],
+ ["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"],
+ ["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"],
+ ])
+
+generate(TwoRegNEONOp,
+ [["absr", "abs", "8B"], ["absr", "abs", "16B"],
+ ["absr", "abs", "4H"], ["absr", "abs", "8H"],
+ ["absr", "abs", "2S"], ["absr", "abs", "4S"],
+ ["absr", "abs", "2D"],
+ ["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"],
+ ["fabs", "fabs", "2D"],
+ ["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"],
+ ["fneg", "fneg", "2D"],
+ ["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"],
+ ["fsqrt", "fsqrt", "2D"],
+ ["notr", "not", "8B"], ["notr", "not", "16B"],
+ ])
+
+generate(ThreeRegNEONOp,
+ [["andr", "and", "8B"], ["andr", "and", "16B"],
+ ["orr", "orr", "8B"], ["orr", "orr", "16B"],
+ ["eor", "eor", "8B"], ["eor", "eor", "16B"],
+ ["addv", "add", "8B"], ["addv", "add", "16B"],
+ ["addv", "add", "4H"], ["addv", "add", "8H"],
+ ["addv", "add", "2S"], ["addv", "add", "4S"],
+ ["addv", "add", "2D"],
+ ["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"],
+ ["fadd", "fadd", "2D"],
+ ["subv", "sub", "8B"], ["subv", "sub", "16B"],
+ ["subv", "sub", "4H"], ["subv", "sub", "8H"],
+ ["subv", "sub", "2S"], ["subv", "sub", "4S"],
+ ["subv", "sub", "2D"],
+ ["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"],
+ ["fsub", "fsub", "2D"],
+ ["mulv", "mul", "8B"], ["mulv", "mul", "16B"],
+ ["mulv", "mul", "4H"], ["mulv", "mul", "8H"],
+ ["mulv", "mul", "2S"], ["mulv", "mul", "4S"],
+ ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"],
+ ["fmul", "fmul", "2D"],
+ ["mlav", "mla", "4H"], ["mlav", "mla", "8H"],
+ ["mlav", "mla", "2S"], ["mlav", "mla", "4S"],
+ ["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"],
+ ["fmla", "fmla", "2D"],
+ ["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"],
+ ["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"],
+ ["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"],
+ ["fmls", "fmls", "2D"],
+ ["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"],
+ ["fdiv", "fdiv", "2D"],
+ ["maxv", "smax", "8B"], ["maxv", "smax", "16B"],
+ ["maxv", "smax", "4H"], ["maxv", "smax", "8H"],
+ ["maxv", "smax", "2S"], ["maxv", "smax", "4S"],
+ ["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"],
+ ["fmax", "fmax", "2D"],
+ ["minv", "smin", "8B"], ["minv", "smin", "16B"],
+ ["minv", "smin", "4H"], ["minv", "smin", "8H"],
+ ["minv", "smin", "2S"], ["minv", "smin", "4S"],
+ ["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"],
+ ["fmin", "fmin", "2D"],
+ ["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"],
+ ["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"],
+ ["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"],
+ ["cmeq", "cmeq", "2D"],
+ ["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"],
+ ["fcmeq", "fcmeq", "2D"],
+ ["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"],
+ ["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"],
+ ["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"],
+ ["cmgt", "cmgt", "2D"],
+ ["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"],
+ ["fcmgt", "fcmgt", "2D"],
+ ["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"],
+ ["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"],
+ ["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"],
+ ["cmge", "cmge", "2D"],
+ ["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"],
+ ["fcmge", "fcmge", "2D"],
+ ])
generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"],
["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"],
@@ -1344,9 +1531,9 @@ def generate(kind, names):
])
print "\n// FloatImmediateOp"
-for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
- "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
- "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
+for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
+ "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625",
+ "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0",
"-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"):
astr = "fmov d0, #" + float
cstr = "__ fmovd(v0, " + float + ");"
@@ -1366,6 +1553,11 @@ def generate(kind, names):
["ldumin", "ldumin", size, suffix],
["ldumax", "ldumax", size, suffix]]);
+# ARMv8.2A
+generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"])
+
+generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"])
+
generate(SVEVectorOp, [["add", "ZZZ"],
["sub", "ZZZ"],
["fadd", "ZZZ"],
@@ -1414,16 +1606,11 @@ def generate(kind, names):
outfile.close()
-import subprocess
-import sys
-
-# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
-subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
+# compile for sve with 8.2 and sha3 because of SHA3 crypto extension.
+subprocess.check_call([AARCH64_AS, "-march=armv8.2-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
print
-print "/*",
-sys.stdout.flush()
-subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"])
+print "/*"
print "*/"
subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"])
@@ -1444,4 +1631,7 @@ def generate(kind, names):
print "\n };"
print "// END Generated code -- do not edit"
+infile.close()
+for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin"]:
+ os.remove(f)
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad
index ede4040491e..ff82cd08cc1 100644
--- a/src/hotspot/cpu/aarch64/aarch64.ad
+++ b/src/hotspot/cpu/aarch64/aarch64.ad
@@ -614,9 +614,7 @@ alloc_class chunk3(RFLAGS);
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
-// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ )
-// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// Class for all 32 bit general purpose registers
@@ -1755,7 +1753,9 @@ int MachCallDynamicJavaNode::ret_addr_offset()
int MachCallRuntimeNode::ret_addr_offset() {
// for generated stubs the call will be
- // far_call(addr)
+ // bl(addr)
+ // or with far branches
+ // bl(trampoline_stub)
// for real runtime callouts it will be six instructions
// see aarch64_enc_java_to_runtime
// adr(rscratch2, retaddr)
@@ -1764,7 +1764,7 @@ int MachCallRuntimeNode::ret_addr_offset() {
// blr(rscratch1)
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb) {
- return MacroAssembler::far_branch_size();
+ return 1 * NativeInstruction::instruction_size;
} else {
return 6 * NativeInstruction::instruction_size;
}
@@ -1966,9 +1966,10 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const {
}
if (do_polling() && C->is_method_compilation()) {
- st->print("# touch polling page\n\t");
- st->print("ldr rscratch1, [rthread],#polling_page_offset\n\t");
- st->print("ldr zr, [rscratch1]");
+ st->print("# test polling word\n\t");
+ st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset()));
+ st->print("cmp sp, rscratch1\n\t");
+ st->print("bhi #slow_path");
}
}
#endif
@@ -1985,7 +1986,13 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
}
if (do_polling() && C->is_method_compilation()) {
- __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type);
+ Label dummy_label;
+ Label* code_stub = &dummy_label;
+ if (!C->output()->in_scratch_emit_size()) {
+ code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset());
+ }
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */);
}
}
@@ -2403,6 +2410,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
break;
case Op_MulVL:
return false;
+ case Op_VectorLoadShuffle:
+ case Op_VectorRearrange:
+ if (vlen < 4) {
+ return false;
+ }
+ break;
default:
break;
}
@@ -2414,6 +2427,10 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return true;
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2459,11 +2476,18 @@ const int Matcher::min_vector_size(const BasicType bt) {
if ((UseSVE > 0) && (MaxVectorSize >= 16)) {
// Currently vector length less than SVE vector register size is not supported.
return max_size;
- } else {
- // For the moment limit the vector size to 8 bytes with NEON.
+ } else { // NEON
+ // Limit the vector size to 8 bytes
int size = 8 / type2aelembytes(bt);
+ if (bt == T_BYTE) {
+ // To support vector api shuffle/rearrange.
+ size = 4;
+ } else if (bt == T_BOOLEAN) {
+ // To support vector api load/store mask.
+ size = 2;
+ }
if (size < 2) size = 2;
- return size;
+ return MIN2(size,max_size);
}
}
@@ -2482,6 +2506,9 @@ const uint Matcher::vector_ideal_reg(int len) {
return Op_VecA;
}
switch(len) {
+ // For 16-bit/32-bit mask vector, reuse VecD.
+ case 2:
+ case 4:
case 8: return Op_VecD;
case 16: return Op_VecX;
}
@@ -2581,11 +2608,6 @@ const bool Matcher::rematerialize_float_constants = false;
// C code as the Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-// No-op on amd64
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
- Unimplemented();
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -3124,6 +3146,12 @@ encode %{
// END Non-volatile memory access
// Vector loads and stores
+ enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{
+ FloatRegister dst_reg = as_FloatRegister($dst$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H,
+ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+
enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S,
@@ -3142,6 +3170,12 @@ encode %{
$mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
+ enc_class aarch64_enc_strvH(vecD src, memory mem) %{
+ FloatRegister src_reg = as_FloatRegister($src$$reg);
+ loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H,
+ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
+ %}
+
enc_class aarch64_enc_strvS(vecD src, memory mem) %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S,
@@ -3733,12 +3767,19 @@ encode %{
if (!_method) {
// A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
} else {
int method_index = resolved_method_index(cbuf);
RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
: static_call_Relocation::spec(method_index);
call = __ trampoline_call(Address(addr, rspec), &cbuf);
-
+ if (call == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
// Emit stub for static call
address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
if (stub == NULL) {
@@ -3746,10 +3787,8 @@ encode %{
return;
}
}
- if (call == NULL) {
- ciEnv::current()->record_failure("CodeCache is full");
- return;
- } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
+
+ if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) {
// Only non uncommon_trap calls need to reinitialize ptrue.
if (uncommon_trap_request() == 0) {
__ reinitialize_ptrue();
@@ -4051,9 +4090,6 @@ frame %{
// Inline Cache Register or Method for I2C.
inline_cache_reg(R12);
- // Method Register when calling interpreter.
- interpreter_method_reg(R12);
-
// Number of stack slots consumed by locking an object
sync_stack_slots(2);
@@ -4245,6 +4281,26 @@ operand immI_31()
interface(CONST_INTER);
%}
+operand immI_2()
+%{
+ predicate(n->get_int() == 2);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
+operand immI_4()
+%{
+ predicate(n->get_int() == 4);
+ match(ConI);
+
+ op_cost(0);
+ format %{ %}
+ interface(CONST_INTER);
+%}
+
operand immI_8()
%{
predicate(n->get_int() == 8);
@@ -5621,16 +5677,6 @@ operand inline_cache_RegP(iRegP reg)
interface(REG_INTER);
%}
-operand interpreter_method_RegP(iRegP reg)
-%{
- constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_reg
- match(reg);
- match(iRegPNoSp);
- op_cost(0);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Thread Register
operand thread_RegP(iRegP reg)
%{
@@ -11215,6 +11261,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{
%}
// BEGIN This section of the file is automatically generated. Do not edit --------------
+// This section is generated from aarch64_ad.m4
// This pattern is automatically generated from aarch64_ad.m4
@@ -14685,7 +14732,11 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag
format %{ "ClearArray $cnt, $base" %}
ins_encode %{
- __ zero_words($base$$Register, $cnt$$Register);
+ address tpc = __ zero_words($base$$Register, $cnt$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
@@ -15963,8 +16014,8 @@ instruct CallStaticJavaDirect(method meth)
format %{ "call,static $meth \t// ==> " %}
- ins_encode( aarch64_enc_java_static_call(meth),
- aarch64_enc_call_epilog );
+ ins_encode(aarch64_enc_java_static_call(meth),
+ aarch64_enc_call_epilog);
ins_pipe(pipe_class_call);
%}
@@ -15982,8 +16033,8 @@ instruct CallDynamicJavaDirect(method meth)
format %{ "CALL,dynamic $meth \t// ==> " %}
- ins_encode( aarch64_enc_java_dynamic_call(meth),
- aarch64_enc_call_epilog );
+ ins_encode(aarch64_enc_java_dynamic_call(meth),
+ aarch64_enc_call_epilog);
ins_pipe(pipe_class_call);
%}
@@ -16369,15 +16420,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2,
ins_pipe(pipe_class_memory);
%}
-instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
+instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
iRegINoSp tmp3, rFlagsReg cr)
%{
match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
- format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+ format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
ins_encode %{
__ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
@@ -16387,6 +16439,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
ins_pipe(pipe_class_memory);
%}
+instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch,
+ iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2,
+ iRegINoSp tmp3, rFlagsReg cr)
+%{
+ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
+ TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
+
+ format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %}
+
+ ins_encode %{
+ __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
+ $result$$Register, $tmp1$$Register, $tmp2$$Register,
+ $tmp3$$Register);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
iRegI_R0 result, rFlagsReg cr)
%{
@@ -16429,10 +16500,14 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
ins_encode %{
- __ arrays_equals($ary1$$Register, $ary2$$Register,
- $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
- $result$$Register, $tmp$$Register, 1);
- %}
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ $result$$Register, $tmp$$Register, 1);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
+ %}
ins_pipe(pipe_class_memory);
%}
@@ -16446,9 +16521,13 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %}
ins_encode %{
- __ arrays_equals($ary1$$Register, $ary2$$Register,
- $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
- $result$$Register, $tmp$$Register, 2);
+ address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register,
+ $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
+ $result$$Register, $tmp$$Register, 2);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
%}
@@ -16459,7 +16538,11 @@ instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg c
effect(USE_KILL ary1, USE_KILL len, KILL cr);
format %{ "has negatives byte[] $ary1,$len -> $result" %}
ins_encode %{
- __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
+ address tpc = __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe( pipe_slow );
%}
@@ -16492,8 +16575,13 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
- __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
- $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
+ address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
+ $tmp1$$FloatRegister, $tmp2$$FloatRegister,
+ $tmp3$$FloatRegister, $tmp4$$Register);
+ if (tpc == NULL) {
+ ciEnv::current()->record_failure("CodeCache is full");
+ return;
+ }
%}
ins_pipe(pipe_class_memory);
%}
@@ -16821,6 +16909,7 @@ instruct replicate2D(vecX dst, vRegD src)
instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP tmp2);
@@ -16840,6 +16929,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp,
instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp);
@@ -16858,6 +16948,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iReg
instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP tmp, TEMP dst);
@@ -16877,6 +16968,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp)
%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (MulReductionVI isrc vsrc));
ins_cost(INSN_COST);
effect(TEMP vtmp, TEMP itmp, TEMP dst);
@@ -17958,8 +18050,7 @@ instruct vabs2F(vecD dst, vecD src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2S)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T2S,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp64);
%}
@@ -17971,8 +18062,7 @@ instruct vabs4F(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (4S)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T4S,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@@ -17984,8 +18074,7 @@ instruct vabs2D(vecX dst, vecX src)
ins_cost(INSN_COST * 3);
format %{ "fabs $dst,$src\t# vector (2D)" %}
ins_encode %{
- __ fabs(as_FloatRegister($dst$$reg), __ T2D,
- as_FloatRegister($src$$reg));
+ __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg));
%}
ins_pipe(vunop_fp128);
%}
@@ -18126,7 +18215,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2)
// ------------------------------ Shift ---------------------------------------
instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{
- predicate(n->as_Vector()->length_in_bytes() == 8);
+ predicate(n->as_Vector()->length_in_bytes() == 4 ||
+ n->as_Vector()->length_in_bytes() == 8);
match(Set dst (LShiftCntV cnt));
match(Set dst (RShiftCntV cnt));
format %{ "dup $dst, $cnt\t# shift count vector (8B)" %}
@@ -18834,6 +18924,216 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{
ins_pipe(vshift128_imm);
%}
+instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (8B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) sh = 7;
+ __ ssra(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (16B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) sh = 7;
+ __ ssra(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (4H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) sh = 15;
+ __ ssra(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (8H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) sh = 15;
+ __ ssra(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src$$reg), sh);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (2S)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (4S)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "ssra $dst, $src, $shift\t# vector (2D)" %}
+ ins_encode %{
+ __ ssra(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (8B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) {
+ __ eor(as_FloatRegister($src$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (16B)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 8) {
+ __ eor(as_FloatRegister($src$$reg), __ T16B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (4H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) {
+ __ eor(as_FloatRegister($src$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ ushr(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (8H)" %}
+ ins_encode %{
+ int sh = (int)$shift$$constant;
+ if (sh >= 16) {
+ __ eor(as_FloatRegister($src$$reg), __ T16B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ } else {
+ __ usra(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src$$reg), sh);
+ }
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (2S)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift64_imm);
+%}
+
+instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (4S)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
+instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift))));
+ ins_cost(INSN_COST);
+ format %{ "usra $dst, $src, $shift\t# vector (2D)" %}
+ ins_encode %{
+ __ usra(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src$$reg),
+ (int)$shift$$constant);
+ %}
+ ins_pipe(vshift128_imm);
+%}
+
instruct vmax2F(vecD dst, vecD src1, vecD src2)
%{
predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
@@ -18950,12 +19250,12 @@ instruct vpopcount4I(vecX dst, vecX src) %{
"uaddlp $dst, $dst\t# vector (8H)"
%}
ins_encode %{
- __ cnt(as_FloatRegister($dst$$reg), __ T16B,
- as_FloatRegister($src$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
- as_FloatRegister($dst$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
- as_FloatRegister($dst$$reg));
+ __ cnt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}
@@ -18969,12 +19269,12 @@ instruct vpopcount2I(vecD dst, vecD src) %{
"uaddlp $dst, $dst\t# vector (4H)"
%}
ins_encode %{
- __ cnt(as_FloatRegister($dst$$reg), __ T8B,
- as_FloatRegister($src$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
- as_FloatRegister($dst$$reg));
- __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
- as_FloatRegister($dst$$reg));
+ __ cnt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($dst$$reg));
+ __ uaddlp(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($dst$$reg));
%}
ins_pipe(pipe_class_default);
%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_ad.m4
index 5893f451459..ac1b6dfec65 100644
--- a/src/hotspot/cpu/aarch64/aarch64_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_ad.m4
@@ -1,4 +1,4 @@
-dnl Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
+dnl Copyright (c) 2019, 2020, Red Hat Inc. All rights reserved.
dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
dnl
dnl This code is free software; you can redistribute it and/or modify it
@@ -19,10 +19,14 @@ dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
dnl or visit www.oracle.com if you need additional information or have any
dnl questions.
dnl
-dnl
-dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic
-dnl and shift patterns patterns used in aarch64.ad.
dnl
+dnl Process this file with m4 aarch64_ad.m4 to generate instructions used in
+dnl aarch64.ad:
+dnl 1. the arithmetic
+dnl 2. shift patterns
+dnl
+// BEGIN This section of the file is automatically generated. Do not edit --------------
+// This section is generated from aarch64_ad.m4
dnl
define(`ORL2I', `ifelse($1,I,orL2I)')
dnl
diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad
new file mode 100644
index 00000000000..33b1a869cc3
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad
@@ -0,0 +1,3456 @@
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
+
+// AArch64 NEON Architecture Description File
+
+// ====================VECTOR INSTRUCTIONS==================================
+
+// ------------------------------ Load/store/reinterpret -----------------------
+
+// Load vector (16 bits)
+instruct loadV2(vecD dst, memory mem)
+%{
+ predicate(n->as_LoadVector()->memory_size() == 2);
+ match(Set dst (LoadVector mem));
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrh $dst,$mem\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_ldrvH(dst, mem) );
+ ins_pipe(vload_reg_mem64);
+%}
+
+// Store Vector (16 bits)
+instruct storeV2(vecD src, memory mem)
+%{
+ predicate(n->as_StoreVector()->memory_size() == 2);
+ match(Set mem (StoreVector mem src));
+ ins_cost(4 * INSN_COST);
+ format %{ "strh $mem,$src\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_strvH(src, mem) );
+ ins_pipe(vstore_reg_mem64);
+%}
+
+instruct reinterpretD(vecD dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretX(vecX dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretD2X(vecX dst, vecD src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}
+
+instruct reinterpretX2D(vecD dst, vecX src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}
+
+// ------------------------------ Vector cast -------------------------------
+
+instruct vcvt4Bto4S(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\t# convert 4B to 4S vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt8Bto8S(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\t# convert 8B to 8S vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Sto4B(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastS2X src));
+ format %{ "xtn $dst, T8B, $src, T8H\t# convert 4S to 4B vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt8Sto8B(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastS2X src));
+ format %{ "xtn $dst, T8B, $src, T8H\t# convert 8S to 8B vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Sto4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastS2X src));
+ format %{ "sxtl $dst, T4S, $src, T4H\t# convert 4S to 4I vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Ito4S(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "xtn $dst, T4H, $src, T4S\t# convert 4I to 4S vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Ito2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorCastI2X src));
+ format %{ "sxtl $dst, T2D, $src, T2S\t# convert 2I to 2L vector" %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2I(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "xtn $dst, T2S, $src, T2D\t# convert 2L to 2I vector" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Bto4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\t# convert 4B to 4I vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Ito4B(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorCastI2X src));
+ format %{ "xtn $dst, T4H, $src, T4S\n\t"
+ "xtn $dst, T8B, $dst, T8H\t# convert 4I to 4B vector"
+ %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Bto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt4Sto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastS2X src));
+ format %{ "sxtl $dst, T4S, $src, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4S to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt2Ito2D(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastI2X src));
+ format %{ "sxtl $dst, T2D, $src, T2S\n\t"
+ "scvtfv T2D, $dst, $dst\t# convert 2I to 2D vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcvt2Ito2F(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "scvtfv T2S, $dst, $src\t# convert 2I to 2F vector" %}
+ ins_encode %{
+ __ scvtfv(__ T2S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt4Ito4F(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastI2X src));
+ format %{ "scvtfv T4S, $dst, $src\t# convert 4I to 4F vector" %}
+ ins_encode %{
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2D(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\t# convert 2L to 2D vector" %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Fto2D(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorCastF2X src));
+ format %{ "fcvtl $dst, T2D, $src, T2S\t# convert 2F to 2D vector" %}
+ ins_encode %{
+ __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Dto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastD2X src));
+ format %{ "fcvtn $dst, T2S, $src, T2D\t# convert 2D to 2F vector" %}
+ ins_encode %{
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vcvt2Lto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\n\t"
+ "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
+ %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Reduction -------------------------------
+
+instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxtb $dst, $dst\t# add reduction8B"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxtb $dst, $dst\t# add reduction16B"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxth $dst, $dst\t# add reduction4S"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxth $dst, $dst\t# add reduction8S"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
+%{
+ match(Set dst (AddReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addpd $tmp, $vsrc\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $isrc, $dst\t# add reduction2L"
+ %}
+ ins_encode %{
+ __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction8B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction16B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
+ format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp, T4H, $vtmp, $vsrc\n\t"
+ "umov $itmp, $vtmp, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction4S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
+ as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction8S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ match(Set dst (MulReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "mul $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "mul $dst, $dst, $tmp\t# mul reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction8B"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction16B"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction4S"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction8S"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "smaxv $tmp, T4S, $vsrc\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction4I"
+ %}
+ ins_encode %{
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T8B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction8B"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T16B, $vsrc\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction16B"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T4H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction4S"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T8H, $vsrc\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction8S"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg));
+ __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "sminv $tmp, T4S, $vsrc\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction4I"
+ %}
+ ins_encode %{
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "smaxv $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc GT\t# max reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "sminv $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc LT\t# min reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_max2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MaxReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp GT\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp GT\t# max reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::GT);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::GT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_min2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MinReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp LT\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp LT\t# min reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::LT);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::LT);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "andw $dst, $dst, $tmp\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $dst, $dst, LSR #8\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# and reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "orrw $dst, $dst, $tmp\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $dst, $dst, LSR #8\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# orr reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "eorw $dst, $dst, $tmp\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $dst, $dst, LSR #8\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# eor reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $dst, $dst, LSR #8\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# and reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $dst, $dst, LSR #8\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# orr reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $dst, $dst, LSR #8\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# eor reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "andw $dst, $dst, $tmp\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# and reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "orrw $dst, $dst, $tmp\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# orr reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "eorw $dst, $dst, $tmp\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# eor reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $dst, $dst, LSR #16\n\t"
+ "andw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# and reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $dst, $dst, LSR #16\n\t"
+ "orrw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# orr reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $dst, $dst, LSR #16\n\t"
+ "eorw $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# eor reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "andw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "andw $dst, $tmp, $dst\t# and reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ andw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ andw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "orrw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "orrw $dst, $tmp, $dst\t# orr reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ orrw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ orrw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "eorw $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "eorw $dst, $tmp, $dst\t# eor reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ eorw($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ eorw($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\n\t"
+ "andr $dst, $dst, $dst, LSR #32\n\t"
+ "andw $dst, $isrc, $dst\t# and reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ andw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\n\t"
+ "orr $dst, $dst, $dst, LSR #32\n\t"
+ "orrw $dst, $isrc, $dst\t# orr reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ orrw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\n\t"
+ "eor $dst, $dst, $dst, LSR #32\n\t"
+ "eorw $dst, $isrc, $dst\t# eor reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ eorw($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_and2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (AndReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "andr $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "andr $dst, $dst, $tmp\t# and reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ andr($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ andr($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_orr2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (OrReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "orr $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "orr $dst, $dst, $tmp\t# orr reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ orr ($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ orr ($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_eor2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (XorReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "eor $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "eor $dst, $dst, $tmp\t# eor reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ eor ($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ eor ($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector insert ---------------------------------
+
+instruct insert8B(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T8B, $idx, $val\t# insert into vector(8B)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T8B, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert16B(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T16B, $idx, $val\t# insert into vector(16B)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T16B, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4S(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T4H, $idx, $val\t# insert into vector(4S)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T4H, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert8S(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T8H, $idx, $val\t# insert into vector(8S)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T8H, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2I(vecD dst, vecD src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "mov $dst, T2S, $idx, $val\t# insert into vector(2I)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T2S, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4I(vecX dst, vecX src, iRegIorL2I val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T4S, $idx, $val\t# insert into vector(4I)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T4S, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2L(vecX dst, vecX src, iRegL val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "mov $dst, T2D, $idx, $val\t# insert into vector(2L)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2F(vecD dst, vecD src, vRegF val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T8B, $src, $src\n\t"
+ "ins $dst, S, $val, $idx, 0\t# insert into vector(2F)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert4F(vecX dst, vecX src, vRegF val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "ins $dst, S, $val, $idx, 0\t# insert into vector(4F)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct insert2D(vecX dst, vecX src, vRegD val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T16B, $src, $src\n\t"
+ "ins $dst, D, $val, $idx, 0\t# insert into vector(2D)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Vector extract ---------------------------------
+
+instruct extract8B(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
+ match(Set dst (ExtractB src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, B, $idx\t# extract from vector(8B)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract16B(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 16);
+ match(Set dst (ExtractB src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, B, $idx\t# extract from vector(16B)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4S(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractS src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, H, $idx\t# extract from vector(4S)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract8S(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 8);
+ match(Set dst (ExtractS src idx));
+ ins_cost(INSN_COST);
+ format %{ "smov $dst, $src, H, $idx\t# extract from vector(8S)" %}
+ ins_encode %{
+ __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2I(iRegINoSp dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractI src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, S, $idx\t# extract from vector(2I)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4I(iRegINoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractI src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, S, $idx\t# extract from vector(4I)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2L(iRegLNoSp dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractL src idx));
+ ins_cost(INSN_COST);
+ format %{ "umov $dst, $src, D, $idx\t# extract from vector(2L)" %}
+ ins_encode %{
+ __ umov($dst$$Register, as_FloatRegister($src$$reg), __ D, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2F(vRegF dst, vecD src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractF src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(2F)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract4F(vRegF dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 4);
+ match(Set dst (ExtractF src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(4F)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ S,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct extract2D(vRegD dst, vecX src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == 2);
+ match(Set dst (ExtractD src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, D, $src, 0, $idx\t# extract from vector(2D)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ D,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+// ------------------------------ Vector comparison ---------------------------------
+
+instruct vcmeq8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmeq4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmeq2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmgt4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmgt2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src1, $src2\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmge4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmge2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmne8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8B)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (16B)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4S)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8S)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2I)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4I)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2L)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2F)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (4F)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmne2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2D)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmeq(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct vcmlt8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmlt4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmlt2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle8B(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (8B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle16B(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (16B)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle4S(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (4S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle8S(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (8S)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2I(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (2I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle4I(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (4I)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2L(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "cmge $dst, $src2, $src1\t# vector cmp (2L)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ cmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2F(vecD dst, vecD src1, vecD src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vcmle4F(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (4F)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vcmle2D(vecX dst, vecX src1, vecX src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::le &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2D)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ fcmge(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+// ------------------------------ Vector mul -----------------------------------
+
+instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVL src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp1, TEMP tmp2);
+ format %{ "umov $tmp1, $src1, D, 0\n\t"
+ "umov $tmp2, $src2, D, 0\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t"
+ "umov $tmp1, $src1, D, 1\n\t"
+ "umov $tmp2, $src2, D, 1\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t"
+ %}
+ ins_encode %{
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// --------------------------------- Vector not --------------------------------
+
+instruct vnot2I(vecD dst, vecD src, immI_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (8B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vnot4I(vecX dst, vecX src, immI_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (16B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct vnot2L(vecX dst, vecX src, immL_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (XorV src (ReplicateL m1)));
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector (16B)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+// ------------------------------ Vector max/min -------------------------------
+
+instruct vmax8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (8B)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmax4S(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (4S)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax8S(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (8S)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmax2I(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (2I)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmax4I(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "maxv $dst, $src1, $src2\t# vector (4I)" %}
+ ins_encode %{
+ __ maxv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (8B)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin4S(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (4S)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin8S(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (8S)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin2I(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (2I)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T2S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop64);
+%}
+
+instruct vmin4I(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "minv $dst, $src1, $src2\t# vector (4I)" %}
+ ins_encode %{
+ __ minv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+
+instruct vmax2L(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MaxV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $src1, $src2\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+instruct vmin2L(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst (MinV src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $src2, $src1\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop128);
+%}
+
+// --------------------------------- blend (bsl) ----------------------------
+
+instruct vbsl8B(vecD dst, vecD src1, vecD src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 8);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector (8B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical64);
+%}
+
+instruct vbsl16B(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == 16);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector (16B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical128);
+%}
+
+// --------------------------------- Load/store Mask ----------------------------
+
+instruct loadmask8B(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# load mask (8B to 8B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadmask16B(vecX dst, vecX src )
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# load mask (16B to 16B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct storemask8B(vecD dst, vecD src , immI_1 size)
+%{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# store mask (8B to 8B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct storemask16B(vecX dst, vecX src , immI_1 size)
+%{
+ predicate(n->as_Vector()->length() == 16);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# store mask (16B to 16B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadmask4S(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\n\t"
+ "negr $dst, $dst\t# load mask (4B to 4H)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask8S(vecX dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\n\t"
+ "negr $dst, $dst\t# load mask (8B to 8H)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask4S(vecD dst, vecD src , immI_2 size)
+%{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\n\t"
+ "negr $dst, $dst\t# store mask (4H to 4B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask8S(vecD dst, vecX src , immI_2 size)
+%{
+ predicate(n->as_Vector()->length() == 8);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\n\t"
+ "negr $dst, $dst\t# store mask (8H to 8B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask2I(vecD dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2H\n\t"
+ "uxtl $dst, $dst\t# 2H to 2S\n\t"
+ "negr $dst, $dst\t# load mask (2B to 2S)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask4I(vecX dst, vecD src )
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadMask src ));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H\n\t"
+ "uxtl $dst, $dst\t# 4H to 4S\n\t"
+ "negr $dst, $dst\t# load mask (4B to 4S)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2I(vecD dst, vecD src , immI_4 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2S to 2H\n\t"
+ "xtn $dst, $dst\t# 2H to 2B\n\t"
+ "negr $dst, $dst\t# store mask (2S to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask4I(vecD dst, vecX src , immI_4 size)
+%{
+ predicate(n->as_Vector()->length() == 4);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 4S to 4H\n\t"
+ "xtn $dst, $dst\t# 4H to 4B\n\t"
+ "negr $dst, $dst\t# store mask (4S to 4B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct loadmask2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+ match(Set dst (VectorLoadMask src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2S\n\t"
+ "uxtl $dst, $dst\t# 2S to 2I\n\t"
+ "uxtl $dst, $dst\t# 2I to 2L\n\t"
+ "neg $dst, $dst\t# load mask (2B to 2L)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
+ __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2L(vecD dst, vecX src, immI_8 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2L to 2I\n\t"
+ "xtn $dst, $dst\t# 2I to 2S\n\t"
+ "xtn $dst, $dst\t# 2S to 2B\n\t"
+ "neg $dst, $dst\t# store mask (2L to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- LOAD_IOTA_INDICES----------------------------------
+
+instruct loadcon8B(vecD dst, immI0 src)
+%{
+ predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
+ n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldrd(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+instruct loadcon16B(vecX dst, immI0 src)
+%{
+ predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldrq(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}
+
+//-------------------------------- LOAD_SHUFFLE ----------------------------------
+
+instruct loadshuffle8B(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get 8B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle16B(vecX dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get 16B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle4S(vecD dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle8S(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 8B to 8H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}
+
+instruct loadshuffle4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H \n\t"
+ "uxtl $dst, $dst\t# 4H to 4S" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Rearrange -------------------------------------
+// Here is an example that rearranges a NEON vector with 4 ints:
+// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
+// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
+// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
+// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
+// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
+// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
+// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
+// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
+// 6. Use Vm as index register, and use V1 as table register.
+// Then get V2 as the result by tbl NEON instructions.
+// Notes:
+// Step 1 matches VectorLoadConst.
+// Step 3 matches VectorLoadShuffle.
+// Step 4, 5, 6 match VectorRearrange.
+// For VectorRearrange short/int, the reason why such complex calculation is
+// required is because NEON tbl supports bytes table only, so for short/int, we
+// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
+// to implement rearrange.
+
+instruct rearrange8B(vecD dst, vecD src, vecD shuffle)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 8B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange16B(vecX dst, vecX src, vecX shuffle)
+%{
+ predicate(n->as_Vector()->length() == 16 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 16B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange4S(vecD dst, vecD src, vecD shuffle, vecD tmp0, vecD tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T4H, $shuffle, $tmp0\n\t"
+ "addv $dst, T8B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T8B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange8S(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 8 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 8S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T8H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T8H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4I" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Anytrue/alltrue -----------------------------
+
+instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T8B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T16B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T8B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T8B, $tmp\n\t"
+ "addv $tmp, T8B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T8B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T16B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T16B, $tmp\n\t"
+ "addv $tmp, T16B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T16B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}
diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
new file mode 100644
index 00000000000..0b1dc5cb7c6
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4
@@ -0,0 +1,1424 @@
+// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2020, Arm Limited. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+dnl Generate the warning
+// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ----
+dnl
+
+// AArch64 NEON Architecture Description File
+
+dnl
+define(`ORL2I', `ifelse($1,I,orL2I)')dnl
+dnl
+define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl
+dnl
+define(`iTYPE2SIMD',
+`ifelse($1, `B', `B',
+ $1, `S', `H',
+ $1, `I', `S',
+ $1, `L', `D',
+ `error($1)')')dnl
+dnl
+define(`fTYPE2SIMD',
+`ifelse($1, `F', `S',
+ $1, `D', `D',
+ `error($1)')')dnl
+dnl
+define(`TYPE2DATATYPE',
+`ifelse($1, `B', `BYTE',
+ $1, `S', `SHORT',
+ $1, `I', `INT',
+ $1, `L', `LONG',
+ $1, `F', `FLOAT',
+ $1, `D', `DOUBLE',
+ `error($1)')')dnl
+dnl
+// ====================VECTOR INSTRUCTIONS==================================
+
+// ------------------------------ Load/store/reinterpret -----------------------
+
+// Load vector (16 bits)
+instruct loadV2(vecD dst, memory mem)
+%{
+ predicate(n->as_LoadVector()->memory_size() == 2);
+ match(Set dst (LoadVector mem));
+ ins_cost(4 * INSN_COST);
+ format %{ "ldrh $dst,$mem\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_ldrvH(dst, mem) );
+ ins_pipe(vload_reg_mem64);
+%}
+
+// Store Vector (16 bits)
+instruct storeV2(vecD src, memory mem)
+%{
+ predicate(n->as_StoreVector()->memory_size() == 2);
+ match(Set mem (StoreVector mem src));
+ ins_cost(4 * INSN_COST);
+ format %{ "strh $mem,$src\t# vector (16 bits)" %}
+ ins_encode( aarch64_enc_strvH(src, mem) );
+ ins_pipe(vstore_reg_mem64);
+%}
+dnl
+define(`REINTERPRET', `
+instruct reinterpret$1`'(vec$1 dst)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2);
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ " # reinterpret $dst" %}
+ ins_encode %{
+ // empty
+ %}
+ ins_pipe(pipe_class_empty);
+%}')dnl
+dnl $1 $2
+REINTERPRET(D, 8)
+REINTERPRET(X, 16)
+dnl
+define(`REINTERPRET_X', `
+instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src)
+%{
+ predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 &&
+ n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4);
+ match(Set dst (VectorReinterpret src));
+ ins_cost(INSN_COST);
+ format %{ " # reinterpret $dst,$src" %}
+ ins_encode %{
+ // If register is the same, then move is not needed.
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T8B,
+ as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ }
+ %}
+ ins_pipe(vlogical64);
+%}')dnl
+dnl $1 $2 $3 $4
+REINTERPRET_X(D, X, 16, 8)
+REINTERPRET_X(X, D, 8, 16)
+dnl
+
+// ------------------------------ Vector cast -------------------------------
+dnl
+define(`VECTOR_CAST_I2I', `
+instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "$6 $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8
+VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H)
+VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H)
+VECTOR_CAST_I2I(4, S, B, D, D, xtn, 8H, 8B)
+VECTOR_CAST_I2I(8, S, B, D, X, xtn, 8H, 8B)
+VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S)
+VECTOR_CAST_I2I(4, I, S, D, X, xtn, 4S, 4H)
+VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D)
+VECTOR_CAST_I2I(2, L, I, D, X, xtn, 2D, 2S)
+dnl
+define(`VECTOR_CAST_B2I', `
+instruct vcvt4$1to4$2`'(vec$3 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorCast$1`'2X src));
+ format %{ "$5 $dst, T$7, $src, T$6\n\t"
+ "$5 $dst, T$9, $dst, T$8\t# convert 4$1 to 4$2 vector"
+ %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
+ __ $5(as_FloatRegister($dst$$reg), __ T$9, as_FloatRegister($dst$$reg), __ T$8);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9
+VECTOR_CAST_B2I(B, I, X, D, sxtl, 8B, 8H, 4H, 4S)
+VECTOR_CAST_B2I(I, B, D, X, xtn, 4S, 4H, 8H, 8B)
+
+instruct vcvt4Bto4F(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastB2X src));
+ format %{ "sxtl $dst, T8H, $src, T8B\n\t"
+ "sxtl $dst, T4S, $dst, T4H\n\t"
+ "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+dnl
+define(`VECTOR_CAST_I2F_L', `
+instruct vcvt$1$2to$1$3`'(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "sxtl $dst, T$5, $src, T$4\n\t"
+ "scvtfv T$5, $dst, $dst\t# convert $1$2 to $1$3 vector"
+ %}
+ ins_encode %{
+ __ sxtl(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg), __ T$4);
+ __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_CAST_I2F_L(4, S, F, 4H, 4S)
+VECTOR_CAST_I2F_L(2, I, D, 2S, 2D)
+dnl
+define(`VECTOR_CAST_I2F', `
+instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorCast$2`'2X src));
+ format %{ "scvtfv T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %}
+ ins_encode %{
+ __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_CAST_I2F(2, I, F, D, 2S)
+VECTOR_CAST_I2F(4, I, F, X, 4S)
+VECTOR_CAST_I2F(2, L, D, X, 2D)
+dnl
+define(`VECTOR_CAST_F2F', `
+instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorCast$1`'2X src));
+ format %{ "$5 $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %}
+ ins_encode %{
+ __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D)
+VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S)
+dnl
+
+instruct vcvt2Lto2F(vecD dst, vecX src)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT);
+ match(Set dst (VectorCastL2X src));
+ format %{ "scvtfv T2D, $dst, $src\n\t"
+ "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector"
+ %}
+ ins_encode %{
+ __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg));
+ __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// ------------------------------ Reduction -------------------------------
+dnl
+define(`REDUCE_ADD_BORS', `
+instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (AddReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addv $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t"
+ "smov $dst, $tmp, iTYPE2SIMD($2), 0\n\t"
+ "addw $dst, $dst, $isrc\n\t"
+ "sxt$4 $dst, $dst\t# add reduction$1$2"
+ %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0);
+ __ addw($dst$$Register, $dst$$Register, $isrc$$Register);
+ __ sxt$4($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4
+REDUCE_ADD_BORS(8, B, D, b)
+REDUCE_ADD_BORS(16, B, X, b)
+REDUCE_ADD_BORS(4, S, D, h)
+REDUCE_ADD_BORS(8, S, X, h)
+dnl
+
+instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp)
+%{
+ match(Set dst (AddReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "addpd $tmp, $vsrc\n\t"
+ "umov $dst, $tmp, D, 0\n\t"
+ "add $dst, $isrc, $dst\t# add reduction2L"
+ %}
+ ins_encode %{
+ __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0);
+ __ add($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction8B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t"
+ "ins $vtmp2, H, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, B, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxtb $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, B, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxtb $dst, $dst\t# mul reduction16B"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ H,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp);
+ format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t"
+ "mulv $vtmp, T4H, $vtmp, $vsrc\n\t"
+ "umov $itmp, $vtmp, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction4S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp$$reg), __ S,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp$$reg), __ T4H,
+ as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (MulReductionVI isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp);
+ format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t"
+ "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t"
+ "ins $vtmp2, S, $vtmp1, 0, 1\n\t"
+ "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t"
+ "umov $itmp, $vtmp2, H, 0\n\t"
+ "mulw $dst, $itmp, $isrc\n\t"
+ "sxth $dst, $dst\n\t"
+ "umov $itmp, $vtmp2, H, 1\n\t"
+ "mulw $dst, $itmp, $dst\n\t"
+ "sxth $dst, $dst\t# mul reduction8S"
+ %}
+ ins_encode %{
+ __ ins(as_FloatRegister($vtmp1$$reg), __ D,
+ as_FloatRegister($vsrc$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H,
+ as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg));
+ __ ins(as_FloatRegister($vtmp2$$reg), __ S,
+ as_FloatRegister($vtmp1$$reg), 0, 1);
+ __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H,
+ as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg));
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0);
+ __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1);
+ __ mulw($dst$$Register, $itmp$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ match(Set dst (MulReductionVL isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "mul $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "mul $dst, $dst, $tmp\t# mul reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ mul($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ mul($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+dnl
+define(`REDUCE_MAX_MIN_INT', `
+instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst ($5ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t"
+ "$6mov $dst, $tmp, iTYPE2SIMD($3), 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3"
+ %}
+ ins_encode %{
+ __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg));
+ __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+REDUCE_MAX_MIN_INT(max, 8, B, D, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 4, S, D, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 8, S, X, Max, s, GT)
+REDUCE_MAX_MIN_INT(max, 4, I, X, Max, u, GT)
+REDUCE_MAX_MIN_INT(min, 8, B, D, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 4, S, D, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 8, S, X, Min, s, LT)
+REDUCE_MAX_MIN_INT(min, 4, I, X, Min, u, LT)
+dnl
+define(`REDUCE_MAX_MIN_2I', `
+instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "dup $tmp, T2D, $vsrc\n\t"
+ "s$1v $tmp, T4S, $tmp\n\t"
+ "umov $dst, $tmp, S, 0\n\t"
+ "cmpw $dst, $isrc\n\t"
+ "cselw $dst, $dst, $isrc $3\t# $1 reduction2I"
+ %}
+ ins_encode %{
+ __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg));
+ __ s$1v(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg));
+ __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0);
+ __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg));
+ __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_MAX_MIN_2I(max, Max, GT)
+REDUCE_MAX_MIN_2I(min, Min, LT)
+dnl
+define(`REDUCE_MAX_MIN_2L', `
+instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp, KILL cr);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "cmp $isrc,$tmp\n\t"
+ "csel $dst, $isrc, $tmp $3\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "cmp $dst, $tmp\n\t"
+ "csel $dst, $dst, $tmp $3\t# $1 reduction2L"
+ %}
+ ins_encode %{
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3);
+ __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg));
+ __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_MAX_MIN_2L(max, Max, GT)
+REDUCE_MAX_MIN_2L(min, Min, LT)
+dnl
+define(`REDUCE_LOGIC_OP_8B', `
+instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "$1w $dst, $dst, $tmp\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $dst, $dst, LSR #8\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# $1 reduction8B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_8B(and, And)
+REDUCE_LOGIC_OP_8B(orr, Or)
+REDUCE_LOGIC_OP_8B(eor, Xor)
+define(`REDUCE_LOGIC_OP_16B', `
+instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $dst, $dst, LSR #8\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxtb $dst, $dst\t# $1 reduction16B"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_16B(and, And, andr)
+REDUCE_LOGIC_OP_16B(orr, Or, orr )
+REDUCE_LOGIC_OP_16B(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_4S', `
+instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "umov $dst, $vsrc, S, 1\n\t"
+ "$1w $dst, $dst, $tmp\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# $1 reduction4S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_4S(and, And)
+REDUCE_LOGIC_OP_4S(orr, Or)
+REDUCE_LOGIC_OP_4S(eor, Xor)
+dnl
+define(`REDUCE_LOGIC_OP_8S', `
+instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $dst, $dst, LSR #16\n\t"
+ "$1w $dst, $isrc, $dst\n\t"
+ "sxth $dst, $dst\t# $1 reduction8S"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_8S(and, And, andr)
+REDUCE_LOGIC_OP_8S(orr, Or, orr )
+REDUCE_LOGIC_OP_8S(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_2I', `
+instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, S, 0\n\t"
+ "$1w $dst, $tmp, $isrc\n\t"
+ "umov $tmp, $vsrc, S, 1\n\t"
+ "$1w $dst, $tmp, $dst\t# $1 reduction2I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0);
+ __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1);
+ __ $1w($dst$$Register, $tmp$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+REDUCE_LOGIC_OP_2I(and, And)
+REDUCE_LOGIC_OP_2I(orr, Or)
+REDUCE_LOGIC_OP_2I(eor, Xor)
+dnl
+define(`REDUCE_LOGIC_OP_4I', `
+instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "umov $dst, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\n\t"
+ "$3 $dst, $dst, $dst, LSR #32\n\t"
+ "$1w $dst, $isrc, $dst\t# $1 reduction4I"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32);
+ __ $1w($dst$$Register, $isrc$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_4I(and, And, andr)
+REDUCE_LOGIC_OP_4I(orr, Or, orr )
+REDUCE_LOGIC_OP_4I(eor, Xor, eor )
+dnl
+define(`REDUCE_LOGIC_OP_2L', `
+instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp)
+%{
+ predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2ReductionV isrc vsrc));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp);
+ format %{ "umov $tmp, $vsrc, D, 0\n\t"
+ "$3 $dst, $isrc, $tmp\n\t"
+ "umov $tmp, $vsrc, D, 1\n\t"
+ "$3 $dst, $dst, $tmp\t# $1 reduction2L"
+ %}
+ ins_encode %{
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0);
+ __ $3($dst$$Register, $isrc$$Register, $tmp$$Register);
+ __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1);
+ __ $3($dst$$Register, $dst$$Register, $tmp$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+REDUCE_LOGIC_OP_2L(and, And, andr)
+REDUCE_LOGIC_OP_2L(orr, Or, orr )
+REDUCE_LOGIC_OP_2L(eor, Xor, eor )
+dnl
+
+// ------------------------------ Vector insert ---------------------------------
+define(`VECTOR_INSERT_I', `
+instruct insert$1$2`'(vec$3 dst, vec$3 src, iReg$4`'ORL2I($4) val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ format %{ "orr $dst, T$5, $src, $src\n\t"
+ "mov $dst, T$1`'iTYPE2SIMD($2), $idx, $val\t# insert into vector($1$2)" %}
+ ins_encode %{
+ if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) {
+ __ orr(as_FloatRegister($dst$$reg), __ T$5,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ }
+ __ mov(as_FloatRegister($dst$$reg), __ T$1`'iTYPE2SIMD($2), $idx$$constant, $val$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_INSERT_I(8, B, D, I, 8B)
+VECTOR_INSERT_I(16, B, X, I, 16B)
+VECTOR_INSERT_I(4, S, D, I, 8B)
+VECTOR_INSERT_I(8, S, X, I, 16B)
+VECTOR_INSERT_I(2, I, D, I, 8B)
+VECTOR_INSERT_I(4, I, X, I, 16B)
+VECTOR_INSERT_I(2, L, X, L, 16B)
+dnl
+define(`VECTOR_INSERT_F', `
+instruct insert$1`'(vec$2 dst, vec$2 src, vReg$3 val, immI idx)
+%{
+ predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorInsert (Binary src val) idx));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "orr $dst, T$4, $src, $src\n\t"
+ "ins $dst, $5, $val, $idx, 0\t# insert into vector($1)" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T$4,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ __ ins(as_FloatRegister($dst$$reg), __ $5,
+ as_FloatRegister($val$$reg), $idx$$constant, 0);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_INSERT_F(2F, D, F, 8B, S)
+VECTOR_INSERT_F(4F, X, F, 16B, S)
+VECTOR_INSERT_F(2D, X, D, 16B, D)
+dnl
+
+// ------------------------------ Vector extract ---------------------------------
+define(`VECTOR_EXTRACT_I', `
+instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
+ match(Set dst (Extract$2 src idx));
+ ins_cost(INSN_COST);
+ format %{ "$5mov $dst, $src, $6, $idx\t# extract from vector($1$2)" %}
+ ins_encode %{
+ __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_EXTRACT_I(8, B, I, D, s, B)
+VECTOR_EXTRACT_I(16, B, I, X, s, B)
+VECTOR_EXTRACT_I(4, S, I, D, s, H)
+VECTOR_EXTRACT_I(8, S, I, X, s, H)
+VECTOR_EXTRACT_I(2, I, I, D, u, S)
+VECTOR_EXTRACT_I(4, I, I, X, u, S)
+VECTOR_EXTRACT_I(2, L, L, X, u, D)
+dnl
+define(`VECTOR_EXTRACT_F', `
+instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx)
+%{
+ predicate(n->in(1)->bottom_type()->is_vect()->length() == $1);
+ match(Set dst (Extract$2 src idx));
+ ins_cost(INSN_COST);
+ format %{ "ins $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %}
+ ins_encode %{
+ __ ins(as_FloatRegister($dst$$reg), __ $4,
+ as_FloatRegister($src$$reg), 0, $idx$$constant);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4
+VECTOR_EXTRACT_F(2, F, D, S)
+VECTOR_EXTRACT_F(4, F, X, S)
+VECTOR_EXTRACT_F(2, D, X, D)
+dnl
+
+// ------------------------------ Vector comparison ---------------------------------
+define(`VECTOR_CMP_EQ_GT_GE', `
+instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$6cm$1 $dst, $src1, $src2\t# vector cmp ($2$3)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $6cm$1(as_FloatRegister($dst$$reg), __ T$2$5,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop$7);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7
+VECTOR_CMP_EQ_GT_GE(eq, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(eq, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(eq, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(eq, 2, D, X, D, f, 128)
+VECTOR_CMP_EQ_GT_GE(gt, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(gt, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(gt, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(gt, 2, D, X, D, f, 128)
+VECTOR_CMP_EQ_GT_GE(ge, 8, B, D, B, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 16,B, X, B, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 4, S, D, H, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 8, S, X, H, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, I, D, S, , 64)
+VECTOR_CMP_EQ_GT_GE(ge, 4, I, X, S, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, L, X, D, , 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, F, D, S, f, 64)
+VECTOR_CMP_EQ_GT_GE(ge, 4, F, X, S, f, 128)
+VECTOR_CMP_EQ_GT_GE(ge, 2, D, X, D, f, 128)
+dnl
+define(`VECTOR_CMP_NE', `
+instruct vcmne$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$5cmeq $dst, $src1, $src2\n\t# vector cmp ($1$2)"
+ "not $dst, $dst\t" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $5cmeq(as_FloatRegister($dst$$reg), __ T$1$4,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($dst$$reg), __ T$6, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_CMP_NE(8, B, D, B, , 8B)
+VECTOR_CMP_NE(16,B, X, B, , 16B)
+VECTOR_CMP_NE(4, S, D, H, , 8B)
+VECTOR_CMP_NE(8, S, X, H, , 16B)
+VECTOR_CMP_NE(2, I, D, S, , 8B)
+VECTOR_CMP_NE(4, I, X, S, , 16B)
+VECTOR_CMP_NE(2, L, X, D, , 16B)
+VECTOR_CMP_NE(2, F, D, S, f, 8B)
+VECTOR_CMP_NE(4, F, X, S, f, 16B)
+VECTOR_CMP_NE(2, D, X, D, f, 16B)
+dnl
+define(`VECTOR_CMP_LT_LE', `
+instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond)
+%{
+ predicate(n->as_Vector()->length() == $2 &&
+ n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 &&
+ n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3));
+ match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
+ format %{ "$6cm$7 $dst, $src2, $src1\t# vector cmp ($2$3)" %}
+ ins_cost(INSN_COST);
+ ins_encode %{
+ __ $6cm$7(as_FloatRegister($dst$$reg), __ T$2$5,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vdop$8);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8
+VECTOR_CMP_LT_LE(lt, 8, B, D, B, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 16,B, X, B, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 4, S, D, H, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 8, S, X, H, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, I, D, S, , gt, 64)
+VECTOR_CMP_LT_LE(lt, 4, I, X, S, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, L, X, D, , gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, F, D, S, f, gt, 64)
+VECTOR_CMP_LT_LE(lt, 4, F, X, S, f, gt, 128)
+VECTOR_CMP_LT_LE(lt, 2, D, X, D, f, gt, 128)
+VECTOR_CMP_LT_LE(le, 8, B, D, B, , ge, 64)
+VECTOR_CMP_LT_LE(le, 16,B, X, B, , ge, 128)
+VECTOR_CMP_LT_LE(le, 4, S, D, H, , ge, 64)
+VECTOR_CMP_LT_LE(le, 8, S, X, H, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, I, D, S, , ge, 64)
+VECTOR_CMP_LT_LE(le, 4, I, X, S, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, L, X, D, , ge, 128)
+VECTOR_CMP_LT_LE(le, 2, F, D, S, f, ge, 64)
+VECTOR_CMP_LT_LE(le, 4, F, X, S, f, ge, 128)
+VECTOR_CMP_LT_LE(le, 2, D, X, D, f, ge, 128)
+dnl
+
+// ------------------------------ Vector mul -----------------------------------
+
+instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (MulVL src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp1, TEMP tmp2);
+ format %{ "umov $tmp1, $src1, D, 0\n\t"
+ "umov $tmp2, $src2, D, 0\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t"
+ "umov $tmp1, $src1, D, 1\n\t"
+ "umov $tmp2, $src2, D, 1\n\t"
+ "mul $tmp2, $tmp2, $tmp1\n\t"
+ "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t"
+ %}
+ ins_encode %{
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register);
+ __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1);
+ __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1);
+ __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg));
+ __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+// --------------------------------- Vector not --------------------------------
+dnl
+define(`MATCH_RULE', `ifelse($1, I,
+`match(Set dst (XorV src (ReplicateB m1)));
+ match(Set dst (XorV src (ReplicateS m1)));
+ match(Set dst (XorV src (ReplicateI m1)));',
+`match(Set dst (XorV src (ReplicateL m1)));')')dnl
+dnl
+define(`VECTOR_NOT', `
+instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == $4);
+ MATCH_RULE($2)
+ ins_cost(INSN_COST);
+ format %{ "not $dst, $src\t# vector ($5)" %}
+ ins_encode %{
+ __ notr(as_FloatRegister($dst$$reg), __ T$5,
+ as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5
+VECTOR_NOT(2, I, D, 8, 8B)
+VECTOR_NOT(4, I, X, 16, 16B)
+VECTOR_NOT(2, L, X, 16, 16B)
+undefine(MATCH_RULE)
+dnl
+// ------------------------------ Vector max/min -------------------------------
+dnl
+define(`PREDICATE', `ifelse($1, 8B,
+`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl
+dnl
+define(`VECTOR_MAX_MIN_INT', `
+instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2)
+%{
+ PREDICATE(`$2$3', $2, TYPE2DATATYPE($3))
+ match(Set dst ($5V src1 src2));
+ ins_cost(INSN_COST);
+ format %{ "$1v $dst, $src1, $src2\t# vector ($2$3)" %}
+ ins_encode %{
+ __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3),
+ as_FloatRegister($src1$$reg),
+ as_FloatRegister($src2$$reg));
+ %}
+ ins_pipe(vdop$6);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_MAX_MIN_INT(max, 8, B, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128)
+VECTOR_MAX_MIN_INT(max, 4, S, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 8, S, X, Max, 128)
+VECTOR_MAX_MIN_INT(max, 2, I, D, Max, 64)
+VECTOR_MAX_MIN_INT(max, 4, I, X, Max, 128)
+VECTOR_MAX_MIN_INT(min, 8, B, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128)
+VECTOR_MAX_MIN_INT(min, 4, S, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 8, S, X, Min, 128)
+VECTOR_MAX_MIN_INT(min, 2, I, D, Min, 64)
+VECTOR_MAX_MIN_INT(min, 4, I, X, Min, 128)
+undefine(PREDICATE)
+dnl
+define(`VECTOR_MAX_MIN_LONG', `
+instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2)
+%{
+ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG);
+ match(Set dst ($2V src1 src2));
+ ins_cost(INSN_COST);
+ effect(TEMP dst);
+ format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t"
+ "bsl $dst, $$3, $$4\t# vector (16B)" %}
+ ins_encode %{
+ __ cmgt(as_FloatRegister($dst$$reg), __ T2D,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ bsl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg));
+ %}
+ ins_pipe(vdop128);
+%}')dnl
+dnl $1 $2 $3 $4
+VECTOR_MAX_MIN_LONG(max, Max, src1, src2)
+VECTOR_MAX_MIN_LONG(min, Min, src2, src1)
+dnl
+
+// --------------------------------- blend (bsl) ----------------------------
+dnl
+define(`VECTOR_BSL', `
+instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2)
+%{
+ predicate(n->as_Vector()->length_in_bytes() == $1);
+ match(Set dst (VectorBlend (Binary src1 src2) dst));
+ ins_cost(INSN_COST);
+ format %{ "bsl $dst, $src2, $src1\t# vector ($1B)" %}
+ ins_encode %{
+ __ bsl(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
+ %}
+ ins_pipe(vlogical$3);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_BSL(8, D, 64)
+VECTOR_BSL(16, X, 128)
+dnl
+
+// --------------------------------- Load/store Mask ----------------------------
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_B', `
+instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$4Mask src $6));
+ ins_cost(INSN_COST);
+ format %{ "negr $dst, $src\t# $1 mask ($2B to $2B)" %}
+ ins_encode %{
+ __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6
+VECTOR_LOAD_STORE_MASK_B(load, 8, D, Load)
+VECTOR_LOAD_STORE_MASK_B(load, 16, X, Load)
+VECTOR_LOAD_STORE_MASK_B(store, 8, D, Store, `, immI_1', size)
+VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size)
+undefine(PREDICATE)dnl
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_S', `
+instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$5Mask src $10));
+ ins_cost(INSN_COST);
+ format %{ "$6 $dst, $src\n\t"
+ "negr $dst, $dst\t# $1 mask ($2$7 to $2$8)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7);
+ __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10
+VECTOR_LOAD_STORE_MASK_S(load, 4, D, D, Load, uxtl, B, H)
+VECTOR_LOAD_STORE_MASK_S(load, 8, X, D, Load, uxtl, B, H)
+VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn, H, B, `, immI_2', size)
+VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn, H, B, `, immI_2', size)
+undefine(PREDICATE)dnl
+dnl
+define(`PREDICATE', `ifelse($1, load,
+`predicate(n->as_Vector()->length() == $2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));',
+`predicate(n->as_Vector()->length() == $2);')')dnl
+dnl
+define(`VECTOR_LOAD_STORE_MASK_I', `
+instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13)
+%{
+ PREDICATE($1, $2)
+ match(Set dst (Vector$5Mask src $13));
+ ins_cost(INSN_COST);
+ format %{ "$6 $dst, $src\t# $2$7 to $2$8\n\t"
+ "$6 $dst, $dst\t# $2$8 to $2$9\n\t"
+ "negr $dst, $dst\t# $1 mask ($2$7 to $2$9)" %}
+ ins_encode %{
+ __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7);
+ __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8);
+ __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10$11 $12 $13
+VECTOR_LOAD_STORE_MASK_I(load, 2, D, D, Load, uxtl, B, H, S, 8, 4)
+VECTOR_LOAD_STORE_MASK_I(load, 4, X, D, Load, uxtl, B, H, S, 8, 4)
+VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn, S, H, B, 4, 8, `, immI_4', size)
+VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn, S, H, B, 4, 8, `, immI_4', size)
+undefine(PREDICATE)
+dnl
+instruct loadmask2L(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 2 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_LONG ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE));
+ match(Set dst (VectorLoadMask src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 2B to 2S\n\t"
+ "uxtl $dst, $dst\t# 2S to 2I\n\t"
+ "uxtl $dst, $dst\t# 2I to 2L\n\t"
+ "neg $dst, $dst\t# load mask (2B to 2L)" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S);
+ __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct storemask2L(vecD dst, vecX src, immI_8 size)
+%{
+ predicate(n->as_Vector()->length() == 2);
+ match(Set dst (VectorStoreMask src size));
+ ins_cost(INSN_COST);
+ format %{ "xtn $dst, $src\t# 2L to 2I\n\t"
+ "xtn $dst, $dst\t# 2I to 2S\n\t"
+ "xtn $dst, $dst\t# 2S to 2B\n\t"
+ "neg $dst, $dst\t# store mask (2L to 2B)" %}
+ ins_encode %{
+ __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D);
+ __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S);
+ __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H);
+ __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- LOAD_IOTA_INDICES----------------------------------
+dnl
+define(`PREDICATE', `ifelse($1, 8,
+`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 ||
+ n->as_Vector()->length() == 8) &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);',
+`predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl
+dnl
+define(`VECTOR_LOAD_CON', `
+instruct loadcon$1B`'(vec$2 dst, immI0 src)
+%{
+ PREDICATE($1)
+ match(Set dst (VectorLoadConst src));
+ ins_cost(INSN_COST);
+ format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %}
+ ins_encode %{
+ __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices()));
+ __ ldr$3(as_FloatRegister($dst$$reg), rscratch1);
+ %}
+ ins_pipe(pipe_class_memory);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_LOAD_CON(8, D, d)
+VECTOR_LOAD_CON(16, X, q)
+undefine(PREDICATE)
+dnl
+//-------------------------------- LOAD_SHUFFLE ----------------------------------
+dnl
+define(`VECTOR_LOAD_SHUFFLE_B', `
+instruct loadshuffle$1B`'(vec$2 dst, vec$2 src)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "mov $dst, $src\t# get $1B shuffle" %}
+ ins_encode %{
+ __ orr(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src$$reg), as_FloatRegister($src$$reg));
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2
+VECTOR_LOAD_SHUFFLE_B(8, D)
+VECTOR_LOAD_SHUFFLE_B(16, X)
+dnl
+define(`VECTOR_LOAD_SHUFFLE_S', `
+instruct loadshuffle$1S`'(vec$2 dst, vec$3 src)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# $1B to $1H" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ %}
+ ins_pipe(pipe_class_default);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_LOAD_SHUFFLE_S(4, D, D)
+VECTOR_LOAD_SHUFFLE_S(8, X, D)
+dnl
+
+instruct loadshuffle4I(vecX dst, vecD src)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorLoadShuffle src));
+ ins_cost(INSN_COST);
+ format %{ "uxtl $dst, $src\t# 4B to 4H \n\t"
+ "uxtl $dst, $dst\t# 4H to 4S" %}
+ ins_encode %{
+ __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B);
+ __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Rearrange -------------------------------------
+// Here is an example that rearranges a NEON vector with 4 ints:
+// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1]
+// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3].
+// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1].
+// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1].
+// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404]
+// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404].
+// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100]
+// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504]
+// 6. Use Vm as index register, and use V1 as table register.
+// Then get V2 as the result by tbl NEON instructions.
+// Notes:
+// Step 1 matches VectorLoadConst.
+// Step 3 matches VectorLoadShuffle.
+// Step 4, 5, 6 match VectorRearrange.
+// For VectorRearrange short/int, the reason why such complex calculation is
+// required is because NEON tbl supports bytes table only, so for short/int, we
+// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl
+// to implement rearrange.
+define(`VECTOR_REARRANGE_B', `
+instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst);
+ format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange $1B" %}
+ ins_encode %{
+ __ tbl(as_FloatRegister($dst$$reg), __ T$1B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+VECTOR_REARRANGE_B(8, D)
+VECTOR_REARRANGE_B(16, X)
+dnl
+define(`VECTOR_REARRANGE_S', `
+instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1)
+%{
+ predicate(n->as_Vector()->length() == $1 &&
+ n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t"
+ "mulv $dst, T$1H, $shuffle, $tmp0\n\t"
+ "addv $dst, T$3B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange $1S" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T$1H,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T$3B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T$3B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2 $3
+VECTOR_REARRANGE_S(4, D, 8)
+VECTOR_REARRANGE_S(8, X, 16)
+
+instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1)
+%{
+ predicate(n->as_Vector()->length() == 4 &&
+ (n->bottom_type()->is_vect()->element_basic_type() == T_INT ||
+ n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));
+ match(Set dst (VectorRearrange src shuffle));
+ ins_cost(INSN_COST);
+ effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1);
+ format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t"
+ "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t"
+ "mulv $dst, T8H, $shuffle, $tmp0\n\t"
+ "addv $dst, T16B, $dst, $tmp1\n\t"
+ "tbl $dst, {$src}, $dst\t# rearrange 4I" %}
+ ins_encode %{
+ __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04);
+ __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100);
+ __ mulv(as_FloatRegister($dst$$reg), __ T4S,
+ as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg));
+ __ addv(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg));
+ __ tbl(as_FloatRegister($dst$$reg), __ T16B,
+ as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg));
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+//-------------------------------- Anytrue/alltrue -----------------------------
+dnl
+define(`ANYTRUE_IN_MASK', `
+instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::ne);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "addv $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::NE);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+ANYTRUE_IN_MASK(8, D)
+ANYTRUE_IN_MASK(16, X)
+dnl
+define(`ALLTRUE_IN_MASK', `
+instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr)
+%{
+ predicate(static_cast(n)->get_predicate() == BoolTest::overflow);
+ match(Set dst (VectorTest src1 src2 ));
+ ins_cost(INSN_COST);
+ effect(TEMP tmp, KILL cr);
+ format %{ "andr $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t"
+ "notr $tmp, T$1B, $tmp\n\t"
+ "addv $tmp, T$1B, $tmp\n\t"
+ "umov $dst, $tmp, B, 0\n\t"
+ "cmp $dst, 0\n\t"
+ "cset $dst" %}
+ ins_encode %{
+ __ andr(as_FloatRegister($tmp$$reg), __ T$1B,
+ as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg));
+ __ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
+ __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg));
+ __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ cmpw($dst$$Register, zr);
+ __ csetw($dst$$Register, Assembler::EQ);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
+dnl $1 $2
+ALLTRUE_IN_MASK(8, D)
+ALLTRUE_IN_MASK(16, X)
+dnl
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad
index 90442c7b8b6..f34d4890c70 100644
--- a/src/hotspot/cpu/aarch64/aarch64_sve.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad
@@ -159,6 +159,31 @@ source %{
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
+ // Vector API specific
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ case Op_MaxReductionV:
+ case Op_MinReductionV:
+ case Op_LoadVectorGather:
+ case Op_StoreVectorScatter:
+ case Op_VectorBlend:
+ case Op_VectorCast:
+ case Op_VectorCastB2X:
+ case Op_VectorCastD2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastS2X:
+ case Op_VectorInsert:
+ case Op_VectorLoadConst:
+ case Op_VectorLoadMask:
+ case Op_VectorLoadShuffle:
+ case Op_VectorMaskCmp:
+ case Op_VectorRearrange:
+ case Op_VectorReinterpret:
+ case Op_VectorStoreMask:
+ case Op_VectorTest:
return false;
default:
return true;
@@ -846,9 +871,49 @@ instruct vpopcountI(vReg dst, vReg src) %{
// vector add reduction
+instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (B)\n\t"
+ "smov $dst, $tmp, B, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "sxtb $dst, $dst\t # add reduction B" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ B,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ sxtb($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
+instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT);
+ match(Set dst (AddReductionVI src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (H)\n\t"
+ "smov $dst, $tmp, H, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "sxth $dst, $dst\t # add reduction H" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ H,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ sxth($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}
+
instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT);
match(Set dst (AddReductionVI src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -866,7 +931,7 @@ instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{
instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG);
match(Set dst (AddReductionVL src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -1264,7 +1329,7 @@ instruct vlsrL(vReg dst, vReg shift) %{
instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (RShiftVB src shift));
+ match(Set dst (RShiftVB src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1283,7 +1348,7 @@ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{
instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (RShiftVS src shift));
+ match(Set dst (RShiftVS src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
@@ -1302,7 +1367,7 @@ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{
instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (RShiftVI src shift));
+ match(Set dst (RShiftVI src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1320,7 +1385,7 @@ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{
instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (RShiftVL src shift));
+ match(Set dst (RShiftVL src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
@@ -1338,7 +1403,7 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (URShiftVB src shift));
+ match(Set dst (URShiftVB src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1361,7 +1426,7 @@ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (URShiftVS src shift));
+ match(Set dst (URShiftVS src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
@@ -1371,7 +1436,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
as_FloatRegister($src$$reg));
return;
}
- if (con >= 8) {
+ if (con >= 16) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
@@ -1384,7 +1449,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (URShiftVI src shift));
+ match(Set dst (URShiftVI src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1402,7 +1467,7 @@ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{
instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (URShiftVL src shift));
+ match(Set dst (URShiftVL src (RShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
@@ -1420,7 +1485,7 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{
instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 16);
- match(Set dst (LShiftVB src shift));
+ match(Set dst (LShiftVB src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %}
ins_encode %{
@@ -1438,12 +1503,12 @@ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{
instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 8);
- match(Set dst (LShiftVS src shift));
+ match(Set dst (LShiftVS src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %}
ins_encode %{
int con = (int)$shift$$constant;
- if (con >= 8) {
+ if (con >= 16) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
@@ -1456,7 +1521,7 @@ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{
instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
- match(Set dst (LShiftVI src shift));
+ match(Set dst (LShiftVI src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %}
ins_encode %{
@@ -1469,7 +1534,7 @@ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{
instruct vlslL_imm(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2);
- match(Set dst (LShiftVL src shift));
+ match(Set dst (LShiftVL src (LShiftCntV shift)));
ins_cost(SVE_COST);
format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %}
ins_encode %{
diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
index 7bb76cc5941..7fe0861a717 100644
--- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
@@ -146,6 +146,31 @@ source %{
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
+ // Vector API specific
+ case Op_AndReductionV:
+ case Op_OrReductionV:
+ case Op_XorReductionV:
+ case Op_MaxReductionV:
+ case Op_MinReductionV:
+ case Op_LoadVectorGather:
+ case Op_StoreVectorScatter:
+ case Op_VectorBlend:
+ case Op_VectorCast:
+ case Op_VectorCastB2X:
+ case Op_VectorCastD2X:
+ case Op_VectorCastF2X:
+ case Op_VectorCastI2X:
+ case Op_VectorCastL2X:
+ case Op_VectorCastS2X:
+ case Op_VectorInsert:
+ case Op_VectorLoadConst:
+ case Op_VectorLoadMask:
+ case Op_VectorLoadShuffle:
+ case Op_VectorMaskCmp:
+ case Op_VectorRearrange:
+ case Op_VectorReinterpret:
+ case Op_VectorStoreMask:
+ case Op_VectorTest:
return false;
default:
return true;
@@ -507,15 +532,38 @@ instruct vpopcountI(vReg dst, vReg src) %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
-%}
+%}dnl
+dnl
+dnl REDUCE_ADD_EXT($1, $2, $3, $4, $5, $6, $7 )
+dnl REDUCE_ADD_EXT(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
+define(`REDUCE_ADD_EXT', `
+instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
+ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6);
+ match(Set dst ($2 src1 src2));
+ effect(TEMP_DEF dst, TEMP tmp);
+ ins_cost(SVE_COST);
+ format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
+ "smov $dst, $tmp, $5, 0\n\t"
+ "addw $dst, $dst, $src1\n\t"
+ "$7 $dst, $dst\t # add reduction $5" %}
+ ins_encode %{
+ __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
+ ptrue, as_FloatRegister($src2$$reg));
+ __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
+ __ addw($dst$$Register, $dst$$Register, $src1$$Register);
+ __ $7($dst$$Register, $dst$$Register);
+ %}
+ ins_pipe(pipe_slow);
+%}')dnl
dnl
dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
define(`REDUCE_ADD', `
instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
- ELEMENT_SHORT_CHAR($6, n->in(2)));
+ n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6);
match(Set dst ($2 src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
@@ -545,8 +593,10 @@ instruct $1($3 src1_dst, vReg src2) %{
%}
ins_pipe(pipe_slow);
%}')dnl
-dnl
+
// vector add reduction
+REDUCE_ADD_EXT(reduce_addB, AddReductionVI, iRegINoSp, iRegIorL2I, B, T_BYTE, sxtb)
+REDUCE_ADD_EXT(reduce_addS, AddReductionVI, iRegINoSp, iRegIorL2I, H, T_SHORT, sxth)
REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
@@ -677,14 +727,14 @@ instruct $1(vReg dst, vReg shift) %{
ins_pipe(pipe_slow);
%}')dnl
dnl
-dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
-dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
+dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5, $6 )
+dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, op_name2, size, min_vec_len, insn)
define(`VSHIFT_IMM_UNPREDICATE', `
instruct $1(vReg dst, vReg src, immI shift) %{
- predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
- match(Set dst ($2 src shift));
+ predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
+ match(Set dst ($2 src ($3 shift)));
ins_cost(SVE_COST);
- format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
+ format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %}
ins_encode %{
int con = (int)$shift$$constant;dnl
ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
@@ -693,16 +743,21 @@ ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
as_FloatRegister($src$$reg));
return;
}')dnl
-ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
- if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
+ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
+ if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, `
if (con >= 16) con = 15;')')dnl
-ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
+ifelse(eval(index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, `
if (con >= 8) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
- }')
- __ $5(as_FloatRegister($dst$$reg), __ $3,
+ }')ifelse(eval(index(`$4', `H') == 0), 1, `
+ if (con >= 16) {
+ __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
+ as_FloatRegister($src$$reg));
+ return;
+ }')')
+ __ $6(as_FloatRegister($dst$$reg), __ $4,
as_FloatRegister($src$$reg), con);
%}
ins_pipe(pipe_slow);
@@ -736,18 +791,18 @@ VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
-VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
-VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
-VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, RShiftCntV, B, 16, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, RShiftCntV, H, 8, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, RShiftCntV, S, 4, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, RShiftCntV, D, 2, sve_asr)
+VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, RShiftCntV, H, 8, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, RShiftCntV, S, 4, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, RShiftCntV, D, 2, sve_lsr)
+VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, LShiftCntV, B, 16, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, LShiftCntV, H, 8, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, LShiftCntV, S, 4, sve_lsl)
+VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, LShiftCntV, D, 2, sve_lsl)
VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
index 29f63ba69a4..c7fac2836b7 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp
@@ -590,7 +590,7 @@ void entry(CodeBuffer *cb) {
__ stnp(r23, r29, Address(r12, 32)); // stnp x23, x29, [x12, #32]
__ ldnp(r0, r6, Address(r21, -80)); // ldnp x0, x6, [x21, #-80]
-// LdStSIMDOp
+// LdStNEONOp
__ ld1(v15, __ T8B, Address(r26)); // ld1 {v15.8B}, [x26]
__ ld1(v23, v24, __ T16B, Address(__ post(r11, 32))); // ld1 {v23.16B, v24.16B}, [x11], 32
__ ld1(v8, v9, v10, __ T1D, Address(__ post(r23, r7))); // ld1 {v8.1D, v9.1D, v10.1D}, [x23], x7
@@ -614,11 +614,146 @@ void entry(CodeBuffer *cb) {
__ ld4r(v0, v1, v2, v3, __ T4H, Address(__ post(r26, 8))); // ld4r {v0.4H, v1.4H, v2.4H, v3.4H}, [x26], 8
__ ld4r(v12, v13, v14, v15, __ T2S, Address(__ post(r25, r2))); // ld4r {v12.2S, v13.2S, v14.2S, v15.2S}, [x25], x2
-// SHA512SIMDOp
- __ sha512h(v22, __ T2D, v27, v4); // sha512h q22, q27, v4.2D
- __ sha512h2(v7, __ T2D, v6, v1); // sha512h2 q7, q6, v1.2D
- __ sha512su0(v26, __ T2D, v15); // sha512su0 v26.2D, v15.2D
- __ sha512su1(v2, __ T2D, v13, v13); // sha512su1 v2.2D, v13.2D, v13.2D
+// NEONReduceInstruction
+ __ addv(v22, __ T8B, v23); // addv b22, v23.8B
+ __ addv(v27, __ T16B, v28); // addv b27, v28.16B
+ __ addv(v4, __ T4H, v5); // addv h4, v5.4H
+ __ addv(v7, __ T8H, v8); // addv h7, v8.8H
+ __ addv(v6, __ T4S, v7); // addv s6, v7.4S
+ __ smaxv(v1, __ T8B, v2); // smaxv b1, v2.8B
+ __ smaxv(v26, __ T16B, v27); // smaxv b26, v27.16B
+ __ smaxv(v15, __ T4H, v16); // smaxv h15, v16.4H
+ __ smaxv(v2, __ T8H, v3); // smaxv h2, v3.8H
+ __ smaxv(v13, __ T4S, v14); // smaxv s13, v14.4S
+ __ fmaxv(v13, __ T4S, v14); // fmaxv s13, v14.4S
+ __ sminv(v24, __ T8B, v25); // sminv b24, v25.8B
+ __ sminv(v23, __ T16B, v24); // sminv b23, v24.16B
+ __ sminv(v4, __ T4H, v5); // sminv h4, v5.4H
+ __ sminv(v19, __ T8H, v20); // sminv h19, v20.8H
+ __ sminv(v15, __ T4S, v16); // sminv s15, v16.4S
+ __ fminv(v0, __ T4S, v1); // fminv s0, v1.4S
+
+// TwoRegNEONOp
+ __ absr(v4, __ T8B, v5); // abs v4.8B, v5.8B
+ __ absr(v20, __ T16B, v21); // abs v20.16B, v21.16B
+ __ absr(v11, __ T4H, v12); // abs v11.4H, v12.4H
+ __ absr(v29, __ T8H, v30); // abs v29.8H, v30.8H
+ __ absr(v15, __ T2S, v16); // abs v15.2S, v16.2S
+ __ absr(v21, __ T4S, v22); // abs v21.4S, v22.4S
+ __ absr(v4, __ T2D, v5); // abs v4.2D, v5.2D
+ __ fabs(v14, __ T2S, v15); // fabs v14.2S, v15.2S
+ __ fabs(v22, __ T4S, v23); // fabs v22.4S, v23.4S
+ __ fabs(v25, __ T2D, v26); // fabs v25.2D, v26.2D
+ __ fneg(v6, __ T2S, v7); // fneg v6.2S, v7.2S
+ __ fneg(v12, __ T4S, v13); // fneg v12.4S, v13.4S
+ __ fneg(v14, __ T2D, v15); // fneg v14.2D, v15.2D
+ __ fsqrt(v13, __ T2S, v14); // fsqrt v13.2S, v14.2S
+ __ fsqrt(v14, __ T4S, v15); // fsqrt v14.4S, v15.4S
+ __ fsqrt(v9, __ T2D, v10); // fsqrt v9.2D, v10.2D
+ __ notr(v25, __ T8B, v26); // not v25.8B, v26.8B
+ __ notr(v28, __ T16B, v29); // not v28.16B, v29.16B
+
+// ThreeRegNEONOp
+ __ andr(v10, __ T8B, v11, v12); // and v10.8B, v11.8B, v12.8B
+ __ andr(v19, __ T16B, v20, v21); // and v19.16B, v20.16B, v21.16B
+ __ orr(v11, __ T8B, v12, v13); // orr v11.8B, v12.8B, v13.8B
+ __ orr(v17, __ T16B, v18, v19); // orr v17.16B, v18.16B, v19.16B
+ __ eor(v21, __ T8B, v22, v23); // eor v21.8B, v22.8B, v23.8B
+ __ eor(v15, __ T16B, v16, v17); // eor v15.16B, v16.16B, v17.16B
+ __ addv(v20, __ T8B, v21, v22); // add v20.8B, v21.8B, v22.8B
+ __ addv(v23, __ T16B, v24, v25); // add v23.16B, v24.16B, v25.16B
+ __ addv(v26, __ T4H, v27, v28); // add v26.4H, v27.4H, v28.4H
+ __ addv(v5, __ T8H, v6, v7); // add v5.8H, v6.8H, v7.8H
+ __ addv(v6, __ T2S, v7, v8); // add v6.2S, v7.2S, v8.2S
+ __ addv(v15, __ T4S, v16, v17); // add v15.4S, v16.4S, v17.4S
+ __ addv(v15, __ T2D, v16, v17); // add v15.2D, v16.2D, v17.2D
+ __ fadd(v25, __ T2S, v26, v27); // fadd v25.2S, v26.2S, v27.2S
+ __ fadd(v16, __ T4S, v17, v18); // fadd v16.4S, v17.4S, v18.4S
+ __ fadd(v27, __ T2D, v28, v29); // fadd v27.2D, v28.2D, v29.2D
+ __ subv(v24, __ T8B, v25, v26); // sub v24.8B, v25.8B, v26.8B
+ __ subv(v15, __ T16B, v16, v17); // sub v15.16B, v16.16B, v17.16B
+ __ subv(v25, __ T4H, v26, v27); // sub v25.4H, v26.4H, v27.4H
+ __ subv(v14, __ T8H, v15, v16); // sub v14.8H, v15.8H, v16.8H
+ __ subv(v10, __ T2S, v11, v12); // sub v10.2S, v11.2S, v12.2S
+ __ subv(v13, __ T4S, v14, v15); // sub v13.4S, v14.4S, v15.4S
+ __ subv(v14, __ T2D, v15, v16); // sub v14.2D, v15.2D, v16.2D
+ __ fsub(v20, __ T2S, v21, v22); // fsub v20.2S, v21.2S, v22.2S
+ __ fsub(v1, __ T4S, v2, v3); // fsub v1.4S, v2.4S, v3.4S
+ __ fsub(v22, __ T2D, v23, v24); // fsub v22.2D, v23.2D, v24.2D
+ __ mulv(v30, __ T8B, v31, v0); // mul v30.8B, v31.8B, v0.8B
+ __ mulv(v14, __ T16B, v15, v16); // mul v14.16B, v15.16B, v16.16B
+ __ mulv(v2, __ T4H, v3, v4); // mul v2.4H, v3.4H, v4.4H
+ __ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H
+ __ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S
+ __ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S
+ __ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S
+ __ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S
+ __ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D
+ __ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H
+ __ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H
+ __ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S
+ __ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S
+ __ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S
+ __ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S
+ __ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D
+ __ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H
+ __ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H
+ __ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S
+ __ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S
+ __ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S
+ __ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S
+ __ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D
+ __ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S
+ __ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S
+ __ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D
+ __ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B
+ __ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B
+ __ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H
+ __ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H
+ __ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S
+ __ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S
+ __ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S
+ __ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S
+ __ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D
+ __ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B
+ __ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B
+ __ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H
+ __ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H
+ __ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S
+ __ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S
+ __ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S
+ __ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S
+ __ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D
+ __ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B
+ __ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B
+ __ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H
+ __ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H
+ __ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S
+ __ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S
+ __ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D
+ __ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S
+ __ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S
+ __ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D
+ __ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B
+ __ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B
+ __ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H
+ __ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H
+ __ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S
+ __ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S
+ __ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D
+ __ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S
+ __ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S
+ __ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D
+ __ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B
+ __ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B
+ __ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H
+ __ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H
+ __ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S
+ __ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S
+ __ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D
+ __ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S
+ __ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S
+ __ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D
// SpecialCases
__ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE
@@ -705,774 +840,160 @@ void entry(CodeBuffer *cb) {
__ fmovd(v0, -1.0625); // fmov d0, #-1.0625
// LSEOp
- __ swp(Assembler::xword, r24, r24, r4); // swp x24, x24, [x4]
- __ ldadd(Assembler::xword, r20, r16, r0); // ldadd x20, x16, [x0]
- __ ldbic(Assembler::xword, r4, r21, r11); // ldclr x4, x21, [x11]
- __ ldeor(Assembler::xword, r30, r16, r22); // ldeor x30, x16, [x22]
- __ ldorr(Assembler::xword, r4, r15, r23); // ldset x4, x15, [x23]
- __ ldsmin(Assembler::xword, r26, r6, r12); // ldsmin x26, x6, [x12]
- __ ldsmax(Assembler::xword, r15, r14, r15); // ldsmax x15, x14, [x15]
- __ ldumin(Assembler::xword, r9, r25, r29); // ldumin x9, x25, [x29]
- __ ldumax(Assembler::xword, r11, r20, r12); // ldumax x11, x20, [x12]
+ __ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29]
+ __ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26]
+ __ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24]
+ __ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15]
+ __ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24]
+ __ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20]
+ __ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2]
+ __ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24]
+ __ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12]
// LSEOp
- __ swpa(Assembler::xword, r16, r22, r16); // swpa x16, x22, [x16]
- __ ldadda(Assembler::xword, r21, r24, r26); // ldadda x21, x24, [x26]
- __ ldbica(Assembler::xword, r6, r6, r16); // ldclra x6, x6, [x16]
- __ ldeora(Assembler::xword, r16, r25, r16); // ldeora x16, x25, [x16]
- __ ldorra(Assembler::xword, r28, r24, r16); // ldseta x28, x24, [x16]
- __ ldsmina(Assembler::xword, r26, r15, r10); // ldsmina x26, x15, [x10]
- __ ldsmaxa(Assembler::xword, r13, r14, r20); // ldsmaxa x13, x14, [x20]
- __ ldumina(Assembler::xword, r1, r23, r30); // ldumina x1, x23, [x30]
- __ ldumaxa(Assembler::xword, r14, r2, r6); // ldumaxa x14, x2, [x6]
+ __ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10]
+ __ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12]
+ __ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1]
+ __ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0]
+ __ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17]
+ __ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28]
+ __ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1]
+ __ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4]
+ __ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6]
// LSEOp
- __ swpal(Assembler::xword, r3, r8, r25); // swpal x3, x8, [x25]
- __ ldaddal(Assembler::xword, r0, r27, r30); // ldaddal x0, x27, [x30]
- __ ldbical(Assembler::xword, r5, r5, r30); // ldclral x5, x5, [x30]
- __ ldeoral(Assembler::xword, r11, r25, r0); // ldeoral x11, x25, [x0]
- __ ldorral(Assembler::xword, zr, r0, r19); // ldsetal xzr, x0, [x19]
- __ ldsminal(Assembler::xword, r29, r26, r9); // ldsminal x29, x26, [x9]
- __ ldsmaxal(Assembler::xword, r26, r12, r15); // ldsmaxal x26, x12, [x15]
- __ lduminal(Assembler::xword, r11, r11, r15); // lduminal x11, x11, [x15]
- __ ldumaxal(Assembler::xword, r25, r22, r24); // ldumaxal x25, x22, [x24]
+ __ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15]
+ __ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12]
+ __ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1]
+ __ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0]
+ __ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26]
+ __ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3]
+ __ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10]
+ __ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2]
+ __ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10]
// LSEOp
- __ swpl(Assembler::xword, r0, r17, r11); // swpl x0, x17, [x11]
- __ ldaddl(Assembler::xword, r6, r29, r6); // ldaddl x6, x29, [x6]
- __ ldbicl(Assembler::xword, r5, r5, r21); // ldclrl x5, x5, [x21]
- __ ldeorl(Assembler::xword, r19, r16, r15); // ldeorl x19, x16, [x15]
- __ ldorrl(Assembler::xword, r30, r27, r28); // ldsetl x30, x27, [x28]
- __ ldsminl(Assembler::xword, r1, r28, r1); // ldsminl x1, x28, [x1]
- __ ldsmaxl(Assembler::xword, r20, r29, r16); // ldsmaxl x20, x29, [x16]
- __ lduminl(Assembler::xword, r13, r10, r29); // lduminl x13, x10, [x29]
- __ ldumaxl(Assembler::xword, r29, r19, r22); // ldumaxl x29, x19, [x22]
+ __ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2]
+ __ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12]
+ __ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2]
+ __ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26]
+ __ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2]
+ __ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15]
+ __ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15]
+ __ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23]
+ __ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23]
// LSEOp
- __ swp(Assembler::word, r10, r4, sp); // swp w10, w4, [sp]
- __ ldadd(Assembler::word, r21, r8, sp); // ldadd w21, w8, [sp]
- __ ldbic(Assembler::word, r19, r10, r28); // ldclr w19, w10, [x28]
- __ ldeor(Assembler::word, r2, r25, r5); // ldeor w2, w25, [x5]
- __ ldorr(Assembler::word, r3, r8, r22); // ldset w3, w8, [x22]
- __ ldsmin(Assembler::word, r19, r13, r5); // ldsmin w19, w13, [x5]
- __ ldsmax(Assembler::word, r29, r24, r21); // ldsmax w29, w24, [x21]
- __ ldumin(Assembler::word, r26, r24, r3); // ldumin w26, w24, [x3]
- __ ldumax(Assembler::word, r24, r26, r23); // ldumax w24, w26, [x23]
+ __ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11]
+ __ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12]
+ __ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16]
+ __ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3]
+ __ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17]
+ __ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1]
+ __ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5]
+ __ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29]
+ __ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20]
// LSEOp
- __ swpa(Assembler::word, r15, r21, r3); // swpa w15, w21, [x3]
- __ ldadda(Assembler::word, r24, r8, r25); // ldadda w24, w8, [x25]
- __ ldbica(Assembler::word, r20, r16, r17); // ldclra w20, w16, [x17]
- __ ldeora(Assembler::word, r2, r1, r0); // ldeora w2, w1, [x0]
- __ ldorra(Assembler::word, r24, r4, r3); // ldseta w24, w4, [x3]
- __ ldsmina(Assembler::word, r12, zr, r28); // ldsmina w12, wzr, [x28]
- __ ldsmaxa(Assembler::word, r10, r26, r2); // ldsmaxa w10, w26, [x2]
- __ ldumina(Assembler::word, r12, r16, sp); // ldumina w12, w16, [sp]
- __ ldumaxa(Assembler::word, r1, r13, r29); // ldumaxa w1, w13, [x29]
+ __ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23]
+ __ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27]
+ __ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1]
+ __ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp]
+ __ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9]
+ __ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29]
+ __ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19]
+ __ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14]
+ __ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15]
// LSEOp
- __ swpal(Assembler::word, r0, r19, r12); // swpal w0, w19, [x12]
- __ ldaddal(Assembler::word, r17, r22, r13); // ldaddal w17, w22, [x13]
- __ ldbical(Assembler::word, r28, r30, sp); // ldclral w28, w30, [sp]
- __ ldeoral(Assembler::word, r1, r26, r28); // ldeoral w1, w26, [x28]
- __ ldorral(Assembler::word, r4, r30, r4); // ldsetal w4, w30, [x4]
- __ ldsminal(Assembler::word, r6, r30, r26); // ldsminal w6, w30, [x26]
- __ ldsmaxal(Assembler::word, r16, r9, r8); // ldsmaxal w16, w9, [x8]
- __ lduminal(Assembler::word, r12, r0, r20); // lduminal w12, w0, [x20]
- __ ldumaxal(Assembler::word, r1, r24, r2); // ldumaxal w1, w24, [x2]
+ __ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16]
+ __ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9]
+ __ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17]
+ __ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30]
+ __ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20]
+ __ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24]
+ __ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22]
+ __ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15]
+ __ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19]
// LSEOp
- __ swpl(Assembler::word, r0, r9, r24); // swpl w0, w9, [x24]
- __ ldaddl(Assembler::word, r26, r16, r30); // ldaddl w26, w16, [x30]
- __ ldbicl(Assembler::word, r3, r10, r23); // ldclrl w3, w10, [x23]
- __ ldeorl(Assembler::word, r10, r4, r15); // ldeorl w10, w4, [x15]
- __ ldorrl(Assembler::word, r2, r11, r8); // ldsetl w2, w11, [x8]
- __ ldsminl(Assembler::word, r10, r15, r17); // ldsminl w10, w15, [x17]
- __ ldsmaxl(Assembler::word, r2, r10, r12); // ldsmaxl w2, w10, [x12]
- __ lduminl(Assembler::word, r12, r15, r13); // lduminl w12, w15, [x13]
- __ ldumaxl(Assembler::word, r2, r7, r20); // ldumaxl w2, w7, [x20]
+ __ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15]
+ __ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16]
+ __ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23]
+ __ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19]
+ __ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2]
+ __ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13]
+ __ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29]
+ __ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27]
+ __ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp]
+
+// SHA3SIMDOp
+ __ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B
+ __ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B
+ __ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D
+ __ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54
+
+// SHA512SIMDOp
+ __ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D
+ __ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D
+ __ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D
+ __ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D
// SVEVectorOp
- __ sve_add(z25, __ B, z15, z4); // add z25.b, z15.b, z4.b
- __ sve_sub(z4, __ S, z11, z17); // sub z4.s, z11.s, z17.s
- __ sve_fadd(z16, __ D, z17, z10); // fadd z16.d, z17.d, z10.d
- __ sve_fmul(z22, __ D, z12, z25); // fmul z22.d, z12.d, z25.d
- __ sve_fsub(z28, __ D, z14, z10); // fsub z28.d, z14.d, z10.d
- __ sve_abs(z1, __ H, p3, z30); // abs z1.h, p3/m, z30.h
- __ sve_add(z15, __ B, p1, z2); // add z15.b, p1/m, z15.b, z2.b
- __ sve_asr(z13, __ S, p4, z16); // asr z13.s, p4/m, z13.s, z16.s
- __ sve_cnt(z3, __ D, p0, z11); // cnt z3.d, p0/m, z11.d
- __ sve_lsl(z5, __ D, p2, z14); // lsl z5.d, p2/m, z5.d, z14.d
- __ sve_lsr(z29, __ B, p0, z20); // lsr z29.b, p0/m, z29.b, z20.b
- __ sve_mul(z20, __ S, p5, z27); // mul z20.s, p5/m, z20.s, z27.s
- __ sve_neg(z26, __ B, p6, z4); // neg z26.b, p6/m, z4.b
- __ sve_not(z22, __ B, p4, z30); // not z22.b, p4/m, z30.b
- __ sve_smax(z11, __ H, p2, z27); // smax z11.h, p2/m, z11.h, z27.h
- __ sve_smin(z28, __ S, p5, z30); // smin z28.s, p5/m, z28.s, z30.s
- __ sve_sub(z30, __ S, p1, z13); // sub z30.s, p1/m, z30.s, z13.s
- __ sve_fabs(z30, __ D, p4, z26); // fabs z30.d, p4/m, z26.d
- __ sve_fadd(z15, __ S, p3, z11); // fadd z15.s, p3/m, z15.s, z11.s
- __ sve_fdiv(z6, __ D, p7, z16); // fdiv z6.d, p7/m, z6.d, z16.d
- __ sve_fmax(z27, __ S, p7, z7); // fmax z27.s, p7/m, z27.s, z7.s
- __ sve_fmin(z19, __ D, p2, z4); // fmin z19.d, p2/m, z19.d, z4.d
- __ sve_fmul(z17, __ S, p4, z22); // fmul z17.s, p4/m, z17.s, z22.s
- __ sve_fneg(z28, __ D, p3, z21); // fneg z28.d, p3/m, z21.d
- __ sve_frintm(z17, __ S, p5, z2); // frintm z17.s, p5/m, z2.s
- __ sve_frintn(z6, __ S, p3, z15); // frintn z6.s, p3/m, z15.s
- __ sve_frintp(z12, __ D, p5, z1); // frintp z12.d, p5/m, z1.d
- __ sve_fsqrt(z17, __ S, p1, z17); // fsqrt z17.s, p1/m, z17.s
- __ sve_fsub(z15, __ S, p5, z13); // fsub z15.s, p5/m, z15.s, z13.s
- __ sve_fmla(z20, __ D, p7, z27, z11); // fmla z20.d, p7/m, z27.d, z11.d
- __ sve_fmls(z3, __ D, p0, z30, z23); // fmls z3.d, p0/m, z30.d, z23.d
- __ sve_fnmla(z17, __ S, p2, z27, z26); // fnmla z17.s, p2/m, z27.s, z26.s
- __ sve_fnmls(z6, __ D, p5, z22, z30); // fnmls z6.d, p5/m, z22.d, z30.d
- __ sve_mla(z2, __ H, p7, z26, z17); // mla z2.h, p7/m, z26.h, z17.h
- __ sve_mls(z22, __ B, p4, z2, z17); // mls z22.b, p4/m, z2.b, z17.b
- __ sve_and(z24, z25, z22); // and z24.d, z25.d, z22.d
- __ sve_eor(z17, z12, z3); // eor z17.d, z12.d, z3.d
- __ sve_orr(z29, z28, z16); // orr z29.d, z28.d, z16.d
+ __ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d
+ __ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d
+ __ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d
+ __ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s
+ __ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s
+ __ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b
+ __ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h
+ __ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d
+ __ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d
+ __ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b
+ __ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h
+ __ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b
+ __ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s
+ __ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h
+ __ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b
+ __ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d
+ __ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s
+ __ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d
+ __ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s
+ __ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s
+ __ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s
+ __ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s
+ __ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s
+ __ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d
+ __ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s
+ __ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s
+ __ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s
+ __ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s
+ __ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s
+ __ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d
+ __ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s
+ __ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s
+ __ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d
+ __ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s
+ __ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s
+ __ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d
+ __ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d
+ __ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d
// SVEReductionOp
- __ sve_andv(v6, __ S, p2, z28); // andv s6, p2, z28.s
- __ sve_orv(v7, __ H, p1, z7); // orv h7, p1, z7.h
- __ sve_eorv(v9, __ B, p5, z8); // eorv b9, p5, z8.b
- __ sve_smaxv(v27, __ B, p5, z30); // smaxv b27, p5, z30.b
- __ sve_sminv(v26, __ H, p0, z16); // sminv h26, p0, z16.h
- __ sve_fminv(v3, __ D, p6, z8); // fminv d3, p6, z8.d
- __ sve_fmaxv(v21, __ D, p6, z26); // fmaxv d21, p6, z26.d
- __ sve_fadda(v22, __ S, p0, z4); // fadda s22, p0, s22, z4.s
- __ sve_uaddv(v17, __ H, p0, z3); // uaddv d17, p0, z3.h
+ __ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d
+ __ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h
+ __ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h
+ __ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d
+ __ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s
+ __ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s
+ __ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d
+ __ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s
+ __ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b
__ bind(forth);
/*
-aarch64ops.o: file format elf64-littleaarch64
-
-
-Disassembly of section .text:
-
-0000000000000000 :
- 0: 8b0d82fa add x26, x23, x13, lsl #32
- 4: cb49970c sub x12, x24, x9, lsr #37
- 8: ab889dfc adds x28, x15, x8, asr #39
- c: eb9ee787 subs x7, x28, x30, asr #57
- 10: 0b9b3ec9 add w9, w22, w27, asr #15
- 14: 4b9179a3 sub w3, w13, w17, asr #30
- 18: 2b88474e adds w14, w26, w8, asr #17
- 1c: 6b8c56c0 subs w0, w22, w12, asr #21
- 20: 8a1a51e0 and x0, x15, x26, lsl #20
- 24: aa11f4ba orr x26, x5, x17, lsl #61
- 28: ca0281b8 eor x24, x13, x2, lsl #32
- 2c: ea918c7c ands x28, x3, x17, asr #35
- 30: 0a5d4a19 and w25, w16, w29, lsr #18
- 34: 2a4b262d orr w13, w17, w11, lsr #9
- 38: 4a513ca5 eor w5, w5, w17, lsr #15
- 3c: 6a9b6ae2 ands w2, w23, w27, asr #26
- 40: 8a70b79b bic x27, x28, x16, lsr #45
- 44: aaba9728 orn x8, x25, x26, asr #37
- 48: ca6dfe3d eon x29, x17, x13, lsr #63
- 4c: ea627f1c bics x28, x24, x2, lsr #31
- 50: 0aa70f53 bic w19, w26, w7, asr #3
- 54: 2aaa0f06 orn w6, w24, w10, asr #3
- 58: 4a6176a4 eon w4, w21, w1, lsr #29
- 5c: 6a604eb0 bics w16, w21, w0, lsr #19
- 60: 1105ed91 add w17, w12, #0x17b
- 64: 3100583e adds w30, w1, #0x16
- 68: 5101f8bd sub w29, w5, #0x7e
- 6c: 710f0306 subs w6, w24, #0x3c0
- 70: 9101a1a0 add x0, x13, #0x68
- 74: b10a5cc8 adds x8, x6, #0x297
- 78: d10810aa sub x10, x5, #0x204
- 7c: f10fd061 subs x1, x3, #0x3f4
- 80: 120cb166 and w6, w11, #0xfff1fff1
- 84: 321764bc orr w28, w5, #0xfffffe07
- 88: 52174681 eor w1, w20, #0x7fffe00
- 8c: 720c0227 ands w7, w17, #0x100000
- 90: 9241018e and x14, x12, #0x8000000000000000
- 94: b25a2969 orr x9, x11, #0x1ffc000000000
- 98: d278b411 eor x17, x0, #0x3fffffffffff00
- 9c: f26aad01 ands x1, x8, #0xffffffffffc00003
- a0: 14000000 b a0
- a4: 17ffffd7 b 0
- a8: 14000242 b 9b0
- ac: 94000000 bl ac
- b0: 97ffffd4 bl 0
- b4: 9400023f bl 9b0
- b8: 3400000a cbz w10, b8
- bc: 34fffa2a cbz w10, 0
- c0: 3400478a cbz w10, 9b0
- c4: 35000008 cbnz w8, c4
- c8: 35fff9c8 cbnz w8, 0
- cc: 35004728 cbnz w8, 9b0
- d0: b400000b cbz x11, d0
- d4: b4fff96b cbz x11, 0
- d8: b40046cb cbz x11, 9b0
- dc: b500001d cbnz x29, dc
- e0: b5fff91d cbnz x29, 0
- e4: b500467d cbnz x29, 9b0
- e8: 10000013 adr x19, e8
- ec: 10fff8b3 adr x19, 0
- f0: 10004613 adr x19, 9b0
- f4: 90000013 adrp x19, 0
- f8: 36300016 tbz w22, #6, f8
- fc: 3637f836 tbz w22, #6, 0
- 100: 36304596 tbz w22, #6, 9b0
- 104: 3758000c tbnz w12, #11, 104
- 108: 375ff7cc tbnz w12, #11, 0
- 10c: 3758452c tbnz w12, #11, 9b0
- 110: 128313a0 mov w0, #0xffffe762 // #-6302
- 114: 528a32c7 mov w7, #0x5196 // #20886
- 118: 7289173b movk w27, #0x48b9
- 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929
- 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152
- 124: f2c285e8 movk x8, #0x142f, lsl #32
- 128: 9358722f sbfx x15, x17, #24, #5
- 12c: 330e652f bfxil w15, w9, #14, #12
- 130: 53067f3b lsr w27, w25, #6
- 134: 93577c53 sbfx x19, x2, #23, #9
- 138: b34a1aac bfi x12, x21, #54, #7
- 13c: d35a4016 ubfiz x22, x0, #38, #17
- 140: 13946c63 extr w3, w3, w20, #27
- 144: 93c3dbc8 extr x8, x30, x3, #54
- 148: 54000000 b.eq 148 // b.none
- 14c: 54fff5a0 b.eq 0 // b.none
- 150: 54004300 b.eq 9b0 // b.none
- 154: 54000001 b.ne 154 // b.any
- 158: 54fff541 b.ne 0 // b.any
- 15c: 540042a1 b.ne 9b0 // b.any
- 160: 54000002 b.cs 160 // b.hs, b.nlast
- 164: 54fff4e2 b.cs 0 // b.hs, b.nlast
- 168: 54004242 b.cs 9b0 // b.hs, b.nlast
- 16c: 54000002 b.cs 16c // b.hs, b.nlast
- 170: 54fff482 b.cs 0 // b.hs, b.nlast
- 174: 540041e2 b.cs 9b0 // b.hs, b.nlast
- 178: 54000003 b.cc 178 // b.lo, b.ul, b.last
- 17c: 54fff423 b.cc 0 // b.lo, b.ul, b.last
- 180: 54004183 b.cc 9b0 // b.lo, b.ul, b.last
- 184: 54000003 b.cc 184 // b.lo, b.ul, b.last
- 188: 54fff3c3 b.cc 0 // b.lo, b.ul, b.last
- 18c: 54004123 b.cc 9b0 // b.lo, b.ul, b.last
- 190: 54000004 b.mi 190 // b.first
- 194: 54fff364 b.mi 0 // b.first
- 198: 540040c4 b.mi 9b0 // b.first
- 19c: 54000005 b.pl 19c // b.nfrst
- 1a0: 54fff305 b.pl 0 // b.nfrst
- 1a4: 54004065 b.pl 9b0 // b.nfrst
- 1a8: 54000006 b.vs 1a8
- 1ac: 54fff2a6 b.vs 0
- 1b0: 54004006 b.vs 9b0
- 1b4: 54000007 b.vc 1b4
- 1b8: 54fff247 b.vc 0
- 1bc: 54003fa7 b.vc 9b0
- 1c0: 54000008 b.hi 1c0 // b.pmore
- 1c4: 54fff1e8 b.hi 0 // b.pmore
- 1c8: 54003f48 b.hi 9b0 // b.pmore
- 1cc: 54000009 b.ls 1cc // b.plast
- 1d0: 54fff189 b.ls 0 // b.plast
- 1d4: 54003ee9 b.ls 9b0 // b.plast
- 1d8: 5400000a b.ge 1d8 // b.tcont
- 1dc: 54fff12a b.ge 0 // b.tcont
- 1e0: 54003e8a b.ge 9b0 // b.tcont
- 1e4: 5400000b b.lt 1e4 // b.tstop
- 1e8: 54fff0cb b.lt 0 // b.tstop
- 1ec: 54003e2b b.lt 9b0 // b.tstop
- 1f0: 5400000c b.gt 1f0
- 1f4: 54fff06c b.gt 0
- 1f8: 54003dcc b.gt 9b0
- 1fc: 5400000d b.le 1fc
- 200: 54fff00d b.le 0
- 204: 54003d6d b.le 9b0
- 208: 5400000e b.al 208
- 20c: 54ffefae b.al 0
- 210: 54003d0e b.al 9b0
- 214: 5400000f b.nv 214
- 218: 54ffef4f b.nv 0
- 21c: 54003caf b.nv 9b0
- 220: d40658e1 svc #0x32c7
- 224: d4014d22 hvc #0xa69
- 228: d4046543 smc #0x232a
- 22c: d4273f60 brk #0x39fb
- 230: d44cad80 hlt #0x656c
- 234: d503201f nop
- 238: d69f03e0 eret
- 23c: d6bf03e0 drps
- 240: d5033fdf isb
- 244: d5033e9f dsb st
- 248: d50332bf dmb oshst
- 24c: d61f0200 br x16
- 250: d63f0280 blr x20
- 254: c80a7d1b stxr w10, x27, [x8]
- 258: c800fea1 stlxr w0, x1, [x21]
- 25c: c85f7fb1 ldxr x17, [x29]
- 260: c85fff9d ldaxr x29, [x28]
- 264: c89ffee1 stlr x1, [x23]
- 268: c8dffe95 ldar x21, [x20]
- 26c: 88167e7b stxr w22, w27, [x19]
- 270: 880bfcd0 stlxr w11, w16, [x6]
- 274: 885f7c11 ldxr w17, [x0]
- 278: 885ffd44 ldaxr w4, [x10]
- 27c: 889ffed8 stlr w24, [x22]
- 280: 88dffe6a ldar w10, [x19]
- 284: 48017fc5 stxrh w1, w5, [x30]
- 288: 4808fe2c stlxrh w8, w12, [x17]
- 28c: 485f7dc9 ldxrh w9, [x14]
- 290: 485ffc27 ldaxrh w7, [x1]
- 294: 489ffe05 stlrh w5, [x16]
- 298: 48dffd82 ldarh w2, [x12]
- 29c: 080a7c6c stxrb w10, w12, [x3]
- 2a0: 081cff4e stlxrb w28, w14, [x26]
- 2a4: 085f7d5e ldxrb w30, [x10]
- 2a8: 085ffeae ldaxrb w14, [x21]
- 2ac: 089ffd2d stlrb w13, [x9]
- 2b0: 08dfff76 ldarb w22, [x27]
- 2b4: c87f4d7c ldxp x28, x19, [x11]
- 2b8: c87fcc5e ldaxp x30, x19, [x2]
- 2bc: c8220417 stxp w2, x23, x1, [x0]
- 2c0: c82cb5f0 stlxp w12, x16, x13, [x15]
- 2c4: 887f55b1 ldxp w17, w21, [x13]
- 2c8: 887ff90b ldaxp w11, w30, [x8]
- 2cc: 88382c2d stxp w24, w13, w11, [x1]
- 2d0: 883aedb5 stlxp w26, w21, w27, [x13]
- 2d4: f819928b stur x11, [x20, #-103]
- 2d8: b803e21c stur w28, [x16, #62]
- 2dc: 381f713b sturb w27, [x9, #-9]
- 2e0: 781ce322 sturh w2, [x25, #-50]
- 2e4: f850f044 ldur x4, [x2, #-241]
- 2e8: b85e129e ldur w30, [x20, #-31]
- 2ec: 385e92f1 ldurb w17, [x23, #-23]
- 2f0: 785ff35d ldurh w29, [x26, #-1]
- 2f4: 39801921 ldrsb x1, [x9, #6]
- 2f8: 7881318b ldursh x11, [x12, #19]
- 2fc: 78dce02b ldursh w11, [x1, #-50]
- 300: b8829313 ldursw x19, [x24, #41]
- 304: fc45f318 ldur d24, [x24, #95]
- 308: bc5d50af ldur s15, [x5, #-43]
- 30c: fc001375 stur d21, [x27, #1]
- 310: bc1951b7 stur s23, [x13, #-107]
- 314: f8008c0b str x11, [x0, #8]!
- 318: b801dc03 str w3, [x0, #29]!
- 31c: 38009dcb strb w11, [x14, #9]!
- 320: 781fdf1d strh w29, [x24, #-3]!
- 324: f8570e2d ldr x13, [x17, #-144]!
- 328: b85faecc ldr w12, [x22, #-6]!
- 32c: 385f6d8d ldrb w13, [x12, #-10]!
- 330: 785ebea0 ldrh w0, [x21, #-21]!
- 334: 38804cf7 ldrsb x23, [x7, #4]!
- 338: 789cbce3 ldrsh x3, [x7, #-53]!
- 33c: 78df9cbc ldrsh w28, [x5, #-7]!
- 340: b89eed38 ldrsw x24, [x9, #-18]!
- 344: fc40cd6e ldr d14, [x11, #12]!
- 348: bc5bdd93 ldr s19, [x12, #-67]!
- 34c: fc103c14 str d20, [x0, #-253]!
- 350: bc040c08 str s8, [x0, #64]!
- 354: f81a2784 str x4, [x28], #-94
- 358: b81ca4ec str w12, [x7], #-54
- 35c: 381e855b strb w27, [x10], #-24
- 360: 7801b506 strh w6, [x8], #27
- 364: f853654e ldr x14, [x10], #-202
- 368: b85d74b0 ldr w16, [x5], #-41
- 36c: 384095c2 ldrb w2, [x14], #9
- 370: 785ec5bc ldrh w28, [x13], #-20
- 374: 389e15a9 ldrsb x9, [x13], #-31
- 378: 789dc703 ldrsh x3, [x24], #-36
- 37c: 78c06474 ldrsh w20, [x3], #6
- 380: b89ff667 ldrsw x7, [x19], #-1
- 384: fc57e51e ldr d30, [x8], #-130
- 388: bc4155f9 ldr s25, [x15], #21
- 38c: fc05a6ee str d14, [x23], #90
- 390: bc1df408 str s8, [x0], #-33
- 394: f835da2a str x10, [x17, w21, sxtw #3]
- 398: b836d9a4 str w4, [x13, w22, sxtw #2]
- 39c: 3833580d strb w13, [x0, w19, uxtw #0]
- 3a0: 7826cb6c strh w12, [x27, w6, sxtw]
- 3a4: f8706900 ldr x0, [x8, x16]
- 3a8: b87ae880 ldr w0, [x4, x26, sxtx]
- 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0]
- 3b0: 78714889 ldrh w9, [x4, w17, uxtw]
- 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0]
- 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw]
- 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw]
- 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2]
- 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3]
- 3c8: bc734963 ldr s3, [x11, w19, uxtw]
- 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3]
- 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2]
- 3d4: f9189d05 str x5, [x8, #12600]
- 3d8: b91ecb1d str w29, [x24, #7880]
- 3dc: 39187a33 strb w19, [x17, #1566]
- 3e0: 791f226d strh w13, [x19, #3984]
- 3e4: f95aa2f3 ldr x19, [x23, #13632]
- 3e8: b9587bb7 ldr w23, [x29, #6264]
- 3ec: 395f7176 ldrb w22, [x11, #2012]
- 3f0: 795d9143 ldrh w3, [x10, #3784]
- 3f4: 399e7e08 ldrsb x8, [x16, #1951]
- 3f8: 799a2697 ldrsh x23, [x20, #3346]
- 3fc: 79df3422 ldrsh w2, [x1, #3994]
- 400: b99c2624 ldrsw x4, [x17, #7204]
- 404: fd5c2374 ldr d20, [x27, #14400]
- 408: bd5fa1d9 ldr s25, [x14, #8096]
- 40c: fd1d595a str d26, [x10, #15024]
- 410: bd1b1869 str s9, [x3, #6936]
- 414: 58002cfb ldr x27, 9b0
- 418: 1800000b ldr w11, 418
- 41c: f8945060 prfum pldl1keep, [x3, #-187]
- 420: d8000000 prfm pldl1keep, 420
- 424: f8ae6ba0 prfm pldl1keep, [x29, x14]
- 428: f99a0080 prfm pldl1keep, [x4, #13312]
- 42c: 1a070035 adc w21, w1, w7
- 430: 3a0700a8 adcs w8, w5, w7
- 434: 5a0e0367 sbc w7, w27, w14
- 438: 7a11009b sbcs w27, w4, w17
- 43c: 9a000380 adc x0, x28, x0
- 440: ba1e030c adcs x12, x24, x30
- 444: da0f0320 sbc x0, x25, x15
- 448: fa030301 sbcs x1, x24, x3
- 44c: 0b340b11 add w17, w24, w20, uxtb #2
- 450: 2b2a278d adds w13, w28, w10, uxth #1
- 454: cb22aa0f sub x15, x16, w2, sxth #2
- 458: 6b2d29bd subs w29, w13, w13, uxth #2
- 45c: 8b2cce8c add x12, x20, w12, sxtw #3
- 460: ab2b877e adds x30, x27, w11, sxtb #1
- 464: cb21c8ee sub x14, x7, w1, sxtw #2
- 468: eb3ba47d subs x29, x3, w27, sxth #1
- 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first
- 470: 7a5132c6 ccmp w22, w17, #0x6, cc // cc = lo, ul, last
- 474: ba5e622e ccmn x17, x30, #0xe, vs
- 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore
- 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le
- 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore
- 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last
- 488: fa4d7882 ccmp x4, #0xd, #0x2, vc
- 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore
- 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast
- 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop
- 498: 5a96976b csneg w11, w27, w22, ls // ls = plast
- 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop
- 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last
- 4a4: da95c14f csinv x15, x10, x21, gt
- 4a8: da89c6fe csneg x30, x23, x9, gt
- 4ac: 5ac0015e rbit w30, w10
- 4b0: 5ac005fd rev16 w29, w15
- 4b4: 5ac00bdd rev w29, w30
- 4b8: 5ac012b9 clz w25, w21
- 4bc: 5ac01404 cls w4, w0
- 4c0: dac002b1 rbit x17, x21
- 4c4: dac0061d rev16 x29, x16
- 4c8: dac00a95 rev32 x21, x20
- 4cc: dac00e66 rev x6, x19
- 4d0: dac0107e clz x30, x3
- 4d4: dac01675 cls x21, x19
- 4d8: 1ac00b0b udiv w11, w24, w0
- 4dc: 1ace0f3b sdiv w27, w25, w14
- 4e0: 1ad121c3 lsl w3, w14, w17
- 4e4: 1ad825e7 lsr w7, w15, w24
- 4e8: 1ad92a3c asr w28, w17, w25
- 4ec: 1adc2f42 ror w2, w26, w28
- 4f0: 9ada0b25 udiv x5, x25, x26
- 4f4: 9ad10e1b sdiv x27, x16, x17
- 4f8: 9acc22a6 lsl x6, x21, x12
- 4fc: 9acc2480 lsr x0, x4, x12
- 500: 9adc2a3b asr x27, x17, x28
- 504: 9ad12c5c ror x28, x2, x17
- 508: 9bce7dea umulh x10, x15, x14
- 50c: 9b597c6e smulh x14, x3, x25
- 510: 1b0e166f madd w15, w19, w14, w5
- 514: 1b1ae490 msub w16, w4, w26, w25
- 518: 9b023044 madd x4, x2, x2, x12
- 51c: 9b089e3d msub x29, x17, x8, x7
- 520: 9b391083 smaddl x3, w4, w25, x4
- 524: 9b24c73a smsubl x26, w25, w4, x17
- 528: 9bb15f40 umaddl x0, w26, w17, x23
- 52c: 9bbcc6af umsubl x15, w21, w28, x17
- 530: 1e23095b fmul s27, s10, s3
- 534: 1e3918e0 fdiv s0, s7, s25
- 538: 1e2f28c9 fadd s9, s6, s15
- 53c: 1e2a39fd fsub s29, s15, s10
- 540: 1e270a22 fmul s2, s17, s7
- 544: 1e77096b fmul d11, d11, d23
- 548: 1e771ba7 fdiv d7, d29, d23
- 54c: 1e6b2b6e fadd d14, d27, d11
- 550: 1e78388b fsub d11, d4, d24
- 554: 1e6e09ec fmul d12, d15, d14
- 558: 1f1c3574 fmadd s20, s11, s28, s13
- 55c: 1f17f98b fmsub s11, s12, s23, s30
- 560: 1f2935da fnmadd s26, s14, s9, s13
- 564: 1f2574ea fnmadd s10, s7, s5, s29
- 568: 1f4b306f fmadd d15, d3, d11, d12
- 56c: 1f5ec7cf fmsub d15, d30, d30, d17
- 570: 1f6f3e93 fnmadd d19, d20, d15, d15
- 574: 1f6226a9 fnmadd d9, d21, d2, d9
- 578: 1e2040fb fmov s27, s7
- 57c: 1e20c3dd fabs s29, s30
- 580: 1e214031 fneg s17, s1
- 584: 1e21c0c2 fsqrt s2, s6
- 588: 1e22c06a fcvt d10, s3
- 58c: 1e604178 fmov d24, d11
- 590: 1e60c027 fabs d7, d1
- 594: 1e61400b fneg d11, d0
- 598: 1e61c223 fsqrt d3, d17
- 59c: 1e6240dc fcvt s28, d6
- 5a0: 1e3800d6 fcvtzs w22, s6
- 5a4: 9e380360 fcvtzs x0, s27
- 5a8: 1e78005a fcvtzs w26, d2
- 5ac: 9e7800e5 fcvtzs x5, d7
- 5b0: 1e22017c scvtf s28, w11
- 5b4: 9e2201b9 scvtf s25, x13
- 5b8: 1e6202eb scvtf d11, w23
- 5bc: 9e620113 scvtf d19, x8
- 5c0: 1e2602b1 fmov w17, s21
- 5c4: 9e660299 fmov x25, d20
- 5c8: 1e270233 fmov s19, w17
- 5cc: 9e6703a2 fmov d2, x29
- 5d0: 1e2822c0 fcmp s22, s8
- 5d4: 1e7322a0 fcmp d21, d19
- 5d8: 1e202288 fcmp s20, #0.0
- 5dc: 1e602168 fcmp d11, #0.0
- 5e0: 293c19f4 stp w20, w6, [x15, #-32]
- 5e4: 2966387b ldp w27, w14, [x3, #-208]
- 5e8: 69762971 ldpsw x17, x10, [x11, #-80]
- 5ec: a9041dc7 stp x7, x7, [x14, #64]
- 5f0: a9475c0c ldp x12, x23, [x0, #112]
- 5f4: 29b61ccd stp w13, w7, [x6, #-80]!
- 5f8: 29ee405e ldp w30, w16, [x2, #-144]!
- 5fc: 69ee0744 ldpsw x4, x1, [x26, #-144]!
- 600: a9843977 stp x23, x14, [x11, #64]!
- 604: a9f46ebd ldp x29, x27, [x21, #-192]!
- 608: 28ba16b6 stp w22, w5, [x21], #-48
- 60c: 28fc44db ldp w27, w17, [x6], #-32
- 610: 68f61831 ldpsw x17, x6, [x1], #-80
- 614: a8b352ad stp x13, x20, [x21], #-208
- 618: a8c56d5e ldp x30, x27, [x10], #80
- 61c: 28024565 stnp w5, w17, [x11, #16]
- 620: 2874134e ldnp w14, w4, [x26, #-96]
- 624: a8027597 stnp x23, x29, [x12, #32]
- 628: a87b1aa0 ldnp x0, x6, [x21, #-80]
- 62c: 0c40734f ld1 {v15.8b}, [x26]
- 630: 4cdfa177 ld1 {v23.16b, v24.16b}, [x11], #32
- 634: 0cc76ee8 ld1 {v8.1d-v10.1d}, [x23], x7
- 638: 4cdf2733 ld1 {v19.8h-v22.8h}, [x25], #64
- 63c: 0d40c23d ld1r {v29.8b}, [x17]
- 640: 4ddfcaf8 ld1r {v24.4s}, [x23], #4
- 644: 0dd9ccaa ld1r {v10.1d}, [x5], x25
- 648: 4c408d51 ld2 {v17.2d, v18.2d}, [x10]
- 64c: 0cdf85ec ld2 {v12.4h, v13.4h}, [x15], #16
- 650: 4d60c239 ld2r {v25.16b, v26.16b}, [x17]
- 654: 0dffcbc1 ld2r {v1.2s, v2.2s}, [x30], #8
- 658: 4de9ce30 ld2r {v16.2d, v17.2d}, [x17], x9
- 65c: 4cc24999 ld3 {v25.4s-v27.4s}, [x12], x2
- 660: 0c404a7a ld3 {v26.2s-v28.2s}, [x19]
- 664: 4d40e6af ld3r {v15.8h-v17.8h}, [x21]
- 668: 4ddfe9b9 ld3r {v25.4s-v27.4s}, [x13], #12
- 66c: 0dddef8e ld3r {v14.1d-v16.1d}, [x28], x29
- 670: 4cdf07b1 ld4 {v17.8h-v20.8h}, [x29], #64
- 674: 0cc000fb ld4 {v27.8b-v30.8b}, [x7], x0
- 678: 0d60e238 ld4r {v24.8b-v27.8b}, [x17]
- 67c: 0dffe740 ld4r {v0.4h-v3.4h}, [x26], #8
- 680: 0de2eb2c ld4r {v12.2s-v15.2s}, [x25], x2
- 684: ce648376 sha512h q22, q27, v4.2d
- 688: ce6184c7 sha512h2 q7, q6, v1.2d
- 68c: cec081fa sha512su0 v26.2d, v15.2d
- 690: ce6d89a2 sha512su1 v2.2d, v13.2d, v13.2d
- 694: ba5fd3e3 ccmn xzr, xzr, #0x3, le
- 698: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none
- 69c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any
- 6a0: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt
- 6a4: 93df03ff ror xzr, xzr, #0
- 6a8: c820ffff stlxp w0, xzr, xzr, [sp]
- 6ac: 8822fc7f stlxp w2, wzr, wzr, [x3]
- 6b0: c8247cbf stxp w4, xzr, xzr, [x5]
- 6b4: 88267fff stxp w6, wzr, wzr, [sp]
- 6b8: 4e010fe0 dup v0.16b, wzr
- 6bc: 4e081fe1 mov v1.d[0], xzr
- 6c0: 4e0c1fe1 mov v1.s[1], wzr
- 6c4: 4e0a1fe1 mov v1.h[2], wzr
- 6c8: 4e071fe1 mov v1.b[3], wzr
- 6cc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
- 6d0: 05a08020 mov z0.s, p0/m, s1
- 6d4: 04b0e3e0 incw x0
- 6d8: 0470e7e1 dech x1
- 6dc: 042f9c20 lsl z0.b, z1.b, #7
- 6e0: 043f9c35 lsl z21.h, z1.h, #15
- 6e4: 047f9c20 lsl z0.s, z1.s, #31
- 6e8: 04ff9c20 lsl z0.d, z1.d, #63
- 6ec: 04299420 lsr z0.b, z1.b, #7
- 6f0: 04319160 asr z0.h, z11.h, #15
- 6f4: 0461943e lsr z30.s, z1.s, #31
- 6f8: 04a19020 asr z0.d, z1.d, #63
- 6fc: 042053ff addvl sp, x0, #31
- 700: 047f5401 addpl x1, sp, #-32
- 704: 25208028 cntp x8, p0, p1.b
- 708: 2538cfe0 mov z0.b, #127
- 70c: 2578d001 mov z1.h, #-128
- 710: 25b8efe2 mov z2.s, #32512
- 714: 25f8f007 mov z7.d, #-32768
- 718: a400a3e0 ld1b {z0.b}, p0/z, [sp]
- 71c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
- 720: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
- 724: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
- 728: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
- 72c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
- 730: e400fbf6 st1b {z22.b}, p6, [sp]
- 734: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
- 738: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
- 73c: e4014be0 st1b {z0.b}, p2, [sp, x1]
- 740: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
- 744: e5f15000 st1d {z0.d}, p4, [x0, x17, lsl #3]
- 748: 858043e0 ldr z0, [sp]
- 74c: 85a043ff ldr z31, [sp, #-256, mul vl]
- 750: e59f5d08 str z8, [x8, #255, mul vl]
- 754: 1e601000 fmov d0, #2.000000000000000000e+00
- 758: 1e603000 fmov d0, #2.125000000000000000e+00
- 75c: 1e621000 fmov d0, #4.000000000000000000e+00
- 760: 1e623000 fmov d0, #4.250000000000000000e+00
- 764: 1e641000 fmov d0, #8.000000000000000000e+00
- 768: 1e643000 fmov d0, #8.500000000000000000e+00
- 76c: 1e661000 fmov d0, #1.600000000000000000e+01
- 770: 1e663000 fmov d0, #1.700000000000000000e+01
- 774: 1e681000 fmov d0, #1.250000000000000000e-01
- 778: 1e683000 fmov d0, #1.328125000000000000e-01
- 77c: 1e6a1000 fmov d0, #2.500000000000000000e-01
- 780: 1e6a3000 fmov d0, #2.656250000000000000e-01
- 784: 1e6c1000 fmov d0, #5.000000000000000000e-01
- 788: 1e6c3000 fmov d0, #5.312500000000000000e-01
- 78c: 1e6e1000 fmov d0, #1.000000000000000000e+00
- 790: 1e6e3000 fmov d0, #1.062500000000000000e+00
- 794: 1e701000 fmov d0, #-2.000000000000000000e+00
- 798: 1e703000 fmov d0, #-2.125000000000000000e+00
- 79c: 1e721000 fmov d0, #-4.000000000000000000e+00
- 7a0: 1e723000 fmov d0, #-4.250000000000000000e+00
- 7a4: 1e741000 fmov d0, #-8.000000000000000000e+00
- 7a8: 1e743000 fmov d0, #-8.500000000000000000e+00
- 7ac: 1e761000 fmov d0, #-1.600000000000000000e+01
- 7b0: 1e763000 fmov d0, #-1.700000000000000000e+01
- 7b4: 1e781000 fmov d0, #-1.250000000000000000e-01
- 7b8: 1e783000 fmov d0, #-1.328125000000000000e-01
- 7bc: 1e7a1000 fmov d0, #-2.500000000000000000e-01
- 7c0: 1e7a3000 fmov d0, #-2.656250000000000000e-01
- 7c4: 1e7c1000 fmov d0, #-5.000000000000000000e-01
- 7c8: 1e7c3000 fmov d0, #-5.312500000000000000e-01
- 7cc: 1e7e1000 fmov d0, #-1.000000000000000000e+00
- 7d0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
- 7d4: f8388098 swp x24, x24, [x4]
- 7d8: f8340010 ldadd x20, x16, [x0]
- 7dc: f8241175 ldclr x4, x21, [x11]
- 7e0: f83e22d0 ldeor x30, x16, [x22]
- 7e4: f82432ef ldset x4, x15, [x23]
- 7e8: f83a5186 ldsmin x26, x6, [x12]
- 7ec: f82f41ee ldsmax x15, x14, [x15]
- 7f0: f82973b9 ldumin x9, x25, [x29]
- 7f4: f82b6194 ldumax x11, x20, [x12]
- 7f8: f8b08216 swpa x16, x22, [x16]
- 7fc: f8b50358 ldadda x21, x24, [x26]
- 800: f8a61206 ldclra x6, x6, [x16]
- 804: f8b02219 ldeora x16, x25, [x16]
- 808: f8bc3218 ldseta x28, x24, [x16]
- 80c: f8ba514f ldsmina x26, x15, [x10]
- 810: f8ad428e ldsmaxa x13, x14, [x20]
- 814: f8a173d7 ldumina x1, x23, [x30]
- 818: f8ae60c2 ldumaxa x14, x2, [x6]
- 81c: f8e38328 swpal x3, x8, [x25]
- 820: f8e003db ldaddal x0, x27, [x30]
- 824: f8e513c5 ldclral x5, x5, [x30]
- 828: f8eb2019 ldeoral x11, x25, [x0]
- 82c: f8ff3260 ldsetal xzr, x0, [x19]
- 830: f8fd513a ldsminal x29, x26, [x9]
- 834: f8fa41ec ldsmaxal x26, x12, [x15]
- 838: f8eb71eb lduminal x11, x11, [x15]
- 83c: f8f96316 ldumaxal x25, x22, [x24]
- 840: f8608171 swpl x0, x17, [x11]
- 844: f86600dd ldaddl x6, x29, [x6]
- 848: f86512a5 ldclrl x5, x5, [x21]
- 84c: f87321f0 ldeorl x19, x16, [x15]
- 850: f87e339b ldsetl x30, x27, [x28]
- 854: f861503c ldsminl x1, x28, [x1]
- 858: f874421d ldsmaxl x20, x29, [x16]
- 85c: f86d73aa lduminl x13, x10, [x29]
- 860: f87d62d3 ldumaxl x29, x19, [x22]
- 864: b82a83e4 swp w10, w4, [sp]
- 868: b83503e8 ldadd w21, w8, [sp]
- 86c: b833138a ldclr w19, w10, [x28]
- 870: b82220b9 ldeor w2, w25, [x5]
- 874: b82332c8 ldset w3, w8, [x22]
- 878: b83350ad ldsmin w19, w13, [x5]
- 87c: b83d42b8 ldsmax w29, w24, [x21]
- 880: b83a7078 ldumin w26, w24, [x3]
- 884: b83862fa ldumax w24, w26, [x23]
- 888: b8af8075 swpa w15, w21, [x3]
- 88c: b8b80328 ldadda w24, w8, [x25]
- 890: b8b41230 ldclra w20, w16, [x17]
- 894: b8a22001 ldeora w2, w1, [x0]
- 898: b8b83064 ldseta w24, w4, [x3]
- 89c: b8ac539f ldsmina w12, wzr, [x28]
- 8a0: b8aa405a ldsmaxa w10, w26, [x2]
- 8a4: b8ac73f0 ldumina w12, w16, [sp]
- 8a8: b8a163ad ldumaxa w1, w13, [x29]
- 8ac: b8e08193 swpal w0, w19, [x12]
- 8b0: b8f101b6 ldaddal w17, w22, [x13]
- 8b4: b8fc13fe ldclral w28, w30, [sp]
- 8b8: b8e1239a ldeoral w1, w26, [x28]
- 8bc: b8e4309e ldsetal w4, w30, [x4]
- 8c0: b8e6535e ldsminal w6, w30, [x26]
- 8c4: b8f04109 ldsmaxal w16, w9, [x8]
- 8c8: b8ec7280 lduminal w12, w0, [x20]
- 8cc: b8e16058 ldumaxal w1, w24, [x2]
- 8d0: b8608309 swpl w0, w9, [x24]
- 8d4: b87a03d0 ldaddl w26, w16, [x30]
- 8d8: b86312ea ldclrl w3, w10, [x23]
- 8dc: b86a21e4 ldeorl w10, w4, [x15]
- 8e0: b862310b ldsetl w2, w11, [x8]
- 8e4: b86a522f ldsminl w10, w15, [x17]
- 8e8: b862418a ldsmaxl w2, w10, [x12]
- 8ec: b86c71af lduminl w12, w15, [x13]
- 8f0: b8626287 ldumaxl w2, w7, [x20]
- 8f4: 042401f9 add z25.b, z15.b, z4.b
- 8f8: 04b10564 sub z4.s, z11.s, z17.s
- 8fc: 65ca0230 fadd z16.d, z17.d, z10.d
- 900: 65d90996 fmul z22.d, z12.d, z25.d
- 904: 65ca05dc fsub z28.d, z14.d, z10.d
- 908: 0456afc1 abs z1.h, p3/m, z30.h
- 90c: 0400044f add z15.b, p1/m, z15.b, z2.b
- 910: 0490920d asr z13.s, p4/m, z13.s, z16.s
- 914: 04daa163 cnt z3.d, p0/m, z11.d
- 918: 04d389c5 lsl z5.d, p2/m, z5.d, z14.d
- 91c: 0411829d lsr z29.b, p0/m, z29.b, z20.b
- 920: 04901774 mul z20.s, p5/m, z20.s, z27.s
- 924: 0417b89a neg z26.b, p6/m, z4.b
- 928: 041eb3d6 not z22.b, p4/m, z30.b
- 92c: 04480b6b smax z11.h, p2/m, z11.h, z27.h
- 930: 048a17dc smin z28.s, p5/m, z28.s, z30.s
- 934: 048105be sub z30.s, p1/m, z30.s, z13.s
- 938: 04dcb35e fabs z30.d, p4/m, z26.d
- 93c: 65808d6f fadd z15.s, p3/m, z15.s, z11.s
- 940: 65cd9e06 fdiv z6.d, p7/m, z6.d, z16.d
- 944: 65869cfb fmax z27.s, p7/m, z27.s, z7.s
- 948: 65c78893 fmin z19.d, p2/m, z19.d, z4.d
- 94c: 658292d1 fmul z17.s, p4/m, z17.s, z22.s
- 950: 04ddaebc fneg z28.d, p3/m, z21.d
- 954: 6582b451 frintm z17.s, p5/m, z2.s
- 958: 6580ade6 frintn z6.s, p3/m, z15.s
- 95c: 65c1b42c frintp z12.d, p5/m, z1.d
- 960: 658da631 fsqrt z17.s, p1/m, z17.s
- 964: 658195af fsub z15.s, p5/m, z15.s, z13.s
- 968: 65eb1f74 fmla z20.d, p7/m, z27.d, z11.d
- 96c: 65f723c3 fmls z3.d, p0/m, z30.d, z23.d
- 970: 65ba4b71 fnmla z17.s, p2/m, z27.s, z26.s
- 974: 65fe76c6 fnmls z6.d, p5/m, z22.d, z30.d
- 978: 04515f42 mla z2.h, p7/m, z26.h, z17.h
- 97c: 04117056 mls z22.b, p4/m, z2.b, z17.b
- 980: 04363338 and z24.d, z25.d, z22.d
- 984: 04a33191 eor z17.d, z12.d, z3.d
- 988: 0470339d orr z29.d, z28.d, z16.d
- 98c: 049a2b86 andv s6, p2, z28.s
- 990: 045824e7 orv h7, p1, z7.h
- 994: 04193509 eorv b9, p5, z8.b
- 998: 040837db smaxv b27, p5, z30.b
- 99c: 044a221a sminv h26, p0, z16.h
- 9a0: 65c73903 fminv d3, p6, z8.d
- 9a4: 65c63b55 fmaxv d21, p6, z26.d
- 9a8: 65982096 fadda s22, p0, s22, z4.s
- 9ac: 04412071 uaddv d17, p0, z3.h
- */
+*/
static const unsigned int insns[] =
{
@@ -1486,30 +1007,30 @@ Disassembly of section .text:
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0227,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
- 0x14000000, 0x17ffffd7, 0x14000242, 0x94000000,
- 0x97ffffd4, 0x9400023f, 0x3400000a, 0x34fffa2a,
- 0x3400478a, 0x35000008, 0x35fff9c8, 0x35004728,
- 0xb400000b, 0xb4fff96b, 0xb40046cb, 0xb500001d,
- 0xb5fff91d, 0xb500467d, 0x10000013, 0x10fff8b3,
- 0x10004613, 0x90000013, 0x36300016, 0x3637f836,
- 0x36304596, 0x3758000c, 0x375ff7cc, 0x3758452c,
+ 0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000,
+ 0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a,
+ 0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888,
+ 0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d,
+ 0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3,
+ 0x10005773, 0x90000013, 0x36300016, 0x3637f836,
+ 0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
- 0x54004300, 0x54000001, 0x54fff541, 0x540042a1,
- 0x54000002, 0x54fff4e2, 0x54004242, 0x54000002,
- 0x54fff482, 0x540041e2, 0x54000003, 0x54fff423,
- 0x54004183, 0x54000003, 0x54fff3c3, 0x54004123,
- 0x54000004, 0x54fff364, 0x540040c4, 0x54000005,
- 0x54fff305, 0x54004065, 0x54000006, 0x54fff2a6,
- 0x54004006, 0x54000007, 0x54fff247, 0x54003fa7,
- 0x54000008, 0x54fff1e8, 0x54003f48, 0x54000009,
- 0x54fff189, 0x54003ee9, 0x5400000a, 0x54fff12a,
- 0x54003e8a, 0x5400000b, 0x54fff0cb, 0x54003e2b,
- 0x5400000c, 0x54fff06c, 0x54003dcc, 0x5400000d,
- 0x54fff00d, 0x54003d6d, 0x5400000e, 0x54ffefae,
- 0x54003d0e, 0x5400000f, 0x54ffef4f, 0x54003caf,
+ 0x54005460, 0x54000001, 0x54fff541, 0x54005401,
+ 0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002,
+ 0x54fff482, 0x54005342, 0x54000003, 0x54fff423,
+ 0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283,
+ 0x54000004, 0x54fff364, 0x54005224, 0x54000005,
+ 0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6,
+ 0x54005166, 0x54000007, 0x54fff247, 0x54005107,
+ 0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009,
+ 0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a,
+ 0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b,
+ 0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d,
+ 0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae,
+ 0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@@ -1541,7 +1062,7 @@ Disassembly of section .text:
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
- 0xbd1b1869, 0x58002cfb, 0x1800000b, 0xf8945060,
+ 0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11,
@@ -1580,58 +1101,92 @@ Disassembly of section .text:
0x4d60c239, 0x0dffcbc1, 0x4de9ce30, 0x4cc24999,
0x0c404a7a, 0x4d40e6af, 0x4ddfe9b9, 0x0dddef8e,
0x4cdf07b1, 0x0cc000fb, 0x0d60e238, 0x0dffe740,
- 0x0de2eb2c, 0xce648376, 0xce6184c7, 0xcec081fa,
- 0xce6d89a2, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4,
- 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
- 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
- 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
- 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
- 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
- 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
- 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
- 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
- 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
- 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
- 0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff,
- 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
- 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
- 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
- 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
- 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
- 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
- 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
- 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
- 0x1e7e3000, 0xf8388098, 0xf8340010, 0xf8241175,
- 0xf83e22d0, 0xf82432ef, 0xf83a5186, 0xf82f41ee,
- 0xf82973b9, 0xf82b6194, 0xf8b08216, 0xf8b50358,
- 0xf8a61206, 0xf8b02219, 0xf8bc3218, 0xf8ba514f,
- 0xf8ad428e, 0xf8a173d7, 0xf8ae60c2, 0xf8e38328,
- 0xf8e003db, 0xf8e513c5, 0xf8eb2019, 0xf8ff3260,
- 0xf8fd513a, 0xf8fa41ec, 0xf8eb71eb, 0xf8f96316,
- 0xf8608171, 0xf86600dd, 0xf86512a5, 0xf87321f0,
- 0xf87e339b, 0xf861503c, 0xf874421d, 0xf86d73aa,
- 0xf87d62d3, 0xb82a83e4, 0xb83503e8, 0xb833138a,
- 0xb82220b9, 0xb82332c8, 0xb83350ad, 0xb83d42b8,
- 0xb83a7078, 0xb83862fa, 0xb8af8075, 0xb8b80328,
- 0xb8b41230, 0xb8a22001, 0xb8b83064, 0xb8ac539f,
- 0xb8aa405a, 0xb8ac73f0, 0xb8a163ad, 0xb8e08193,
- 0xb8f101b6, 0xb8fc13fe, 0xb8e1239a, 0xb8e4309e,
- 0xb8e6535e, 0xb8f04109, 0xb8ec7280, 0xb8e16058,
- 0xb8608309, 0xb87a03d0, 0xb86312ea, 0xb86a21e4,
- 0xb862310b, 0xb86a522f, 0xb862418a, 0xb86c71af,
- 0xb8626287, 0x042401f9, 0x04b10564, 0x65ca0230,
- 0x65d90996, 0x65ca05dc, 0x0456afc1, 0x0400044f,
- 0x0490920d, 0x04daa163, 0x04d389c5, 0x0411829d,
- 0x04901774, 0x0417b89a, 0x041eb3d6, 0x04480b6b,
- 0x048a17dc, 0x048105be, 0x04dcb35e, 0x65808d6f,
- 0x65cd9e06, 0x65869cfb, 0x65c78893, 0x658292d1,
- 0x04ddaebc, 0x6582b451, 0x6580ade6, 0x65c1b42c,
- 0x658da631, 0x658195af, 0x65eb1f74, 0x65f723c3,
- 0x65ba4b71, 0x65fe76c6, 0x04515f42, 0x04117056,
- 0x04363338, 0x04a33191, 0x0470339d, 0x049a2b86,
- 0x045824e7, 0x04193509, 0x040837db, 0x044a221a,
- 0x65c73903, 0x65c63b55, 0x65982096, 0x04412071,
-
+ 0x0de2eb2c, 0x0e31baf6, 0x4e31bb9b, 0x0e71b8a4,
+ 0x4e71b907, 0x4eb1b8e6, 0x0e30a841, 0x4e30ab7a,
+ 0x0e70aa0f, 0x4e70a862, 0x4eb0a9cd, 0x6e30f9cd,
+ 0x0e31ab38, 0x4e31ab17, 0x0e71a8a4, 0x4e71aa93,
+ 0x4eb1aa0f, 0x6eb0f820, 0x0e20b8a4, 0x4e20bab4,
+ 0x0e60b98b, 0x4e60bbdd, 0x0ea0ba0f, 0x4ea0bad5,
+ 0x4ee0b8a4, 0x0ea0f9ee, 0x4ea0faf6, 0x4ee0fb59,
+ 0x2ea0f8e6, 0x6ea0f9ac, 0x6ee0f9ee, 0x2ea1f9cd,
+ 0x6ea1f9ee, 0x6ee1f949, 0x2e205b59, 0x6e205bbc,
+ 0x0e2c1d6a, 0x4e351e93, 0x0ead1d8b, 0x4eb31e51,
+ 0x2e371ed5, 0x6e311e0f, 0x0e3686b4, 0x4e398717,
+ 0x0e7c877a, 0x4e6784c5, 0x0ea884e6, 0x4eb1860f,
+ 0x4ef1860f, 0x0e3bd759, 0x4e32d630, 0x4e7dd79b,
+ 0x2e3a8738, 0x6e31860f, 0x2e7b8759, 0x6e7085ee,
+ 0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4,
+ 0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee,
+ 0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07,
+ 0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd,
+ 0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b,
+ 0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420,
+ 0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49,
+ 0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b,
+ 0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5,
+ 0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a,
+ 0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5,
+ 0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f,
+ 0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc,
+ 0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4,
+ 0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a,
+ 0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6,
+ 0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4,
+ 0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a,
+ 0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4,
+ 0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651,
+ 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2,
+ 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf,
+ 0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1,
+ 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020,
+ 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35,
+ 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160,
+ 0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401,
+ 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2,
+ 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814,
+ 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6,
+ 0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0,
+ 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08,
+ 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
+ 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
+ 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
+ 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
+ 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
+ 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
+ 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
+ 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
+ 0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7,
+ 0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300,
+ 0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f,
+ 0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f,
+ 0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188,
+ 0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e,
+ 0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051,
+ 0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044,
+ 0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa,
+ 0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062,
+ 0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af,
+ 0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025,
+ 0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f,
+ 0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b,
+ 0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304,
+ 0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2,
+ 0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051,
+ 0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1,
+ 0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858,
+ 0xce768151, 0xce718451, 0xcec08300, 0xce628ad9,
+ 0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887,
+ 0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903,
+ 0x04dabb55, 0x04138096, 0x04518071, 0x041008c1,
+ 0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171,
+ 0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed,
+ 0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e,
+ 0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62,
+ 0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3,
+ 0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4,
+ 0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05,
+ 0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8,
+ 0x65c633bc, 0x65982c49, 0x040120fc,
};
// END Generated code -- do not edit
diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
index 711d9db07e5..7ff9c018bef 100644
--- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp
@@ -217,7 +217,7 @@ class Instruction_aarch64 {
static void patch(address a, int msb, int lsb, uint64_t val) {
int nbits = msb - lsb + 1;
- guarantee(val < (1U << nbits), "Field too big for insn");
+ guarantee(val < (1ULL << nbits), "Field too big for insn");
assert_cond(msb >= lsb);
unsigned mask = (1U << nbits) - 1;
val <<= lsb;
@@ -445,8 +445,8 @@ class Address {
}
Register base() const {
- guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
- | _mode == post | _mode == post_reg),
+ guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
+ || _mode == post || _mode == post_reg),
"wrong mode");
return _base;
}
@@ -1371,6 +1371,21 @@ class Assembler : public AbstractAssembler {
#undef INSN
+#define INSN(NAME, size, opc) \
+ void NAME(FloatRegister Rt, Register Rn) { \
+ starti; \
+ f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \
+ f(0, 20, 12), f(0b01, 11, 10); \
+ rf(Rn, 5), rf((Register)Rt, 0); \
+ }
+
+ INSN(ldrs, 0b10, 0b01);
+ INSN(ldrd, 0b11, 0b01);
+ INSN(ldrq, 0b00, 0b11);
+
+#undef INSN
+
+
#define INSN(NAME, opc, V) \
void NAME(address dest, prfop op = PLDL1KEEP) { \
int64_t offset = (dest - pc()) >> 2; \
@@ -1508,6 +1523,21 @@ class Assembler : public AbstractAssembler {
#undef INSN
+/* SIMD extensions
+ *
+ * We just use FloatRegister in the following. They are exactly the same
+ * as SIMD registers.
+ */
+public:
+
+ enum SIMD_Arrangement {
+ T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
+ };
+
+ enum SIMD_RegVariant {
+ B, H, S, D, Q
+ };
+
enum shift_kind { LSL, LSR, ASR, ROR };
void op_shifted_reg(unsigned decode,
@@ -1887,6 +1917,30 @@ void mvnw(Register Rd, Register Rm,
i_fmovs(Vd, Vn);
}
+private:
+ void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
+ FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
+ assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
+ || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
+ starti;
+ int op30 = (do_extend ? Tb : Ta) & 1;
+ int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
+ f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
+ f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
+ rf(Vn, 5), rf(Vd, 0);
+ }
+
+public:
+ void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
+ _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
+ }
+
+ void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
+ _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
+ }
+
#undef INSN
// Floating-point data-processing (2 source)
@@ -2023,6 +2077,43 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
+ enum sign_kind { SIGNED, UNSIGNED };
+
+private:
+ void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
+ FloatRegister Rd, FloatRegister Rn) {
+ starti;
+ f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
+ f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
+ rf(Rn, 5), rf(Rd, 0);
+ }
+
+public:
+#define INSN(NAME, sign, sz) \
+ void NAME(FloatRegister Rd, FloatRegister Rn) { \
+ _xcvtf_scalar_integer(sign, sz, Rd, Rn); \
+ }
+
+ INSN(scvtfs, SIGNED, 0);
+ INSN(scvtfd, SIGNED, 1);
+
+#undef INSN
+
+private:
+ void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
+ FloatRegister Rd, FloatRegister Rn) {
+ assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
+ starti;
+ f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
+ f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
+ rf(Rn, 5), rf(Rd, 0);
+ }
+
+public:
+ void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
+ _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
+ }
+
// Floating-point compare
void float_compare(unsigned op31, unsigned type,
unsigned op, unsigned op2,
@@ -2152,21 +2243,6 @@ void mvnw(Register Rd, Register Rm,
INSN(frintzd, 0b01, 0b011);
#undef INSN
-/* SIMD extensions
- *
- * We just use FloatRegister in the following. They are exactly the same
- * as SIMD registers.
- */
- public:
-
- enum SIMD_Arrangement {
- T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
- };
-
- enum SIMD_RegVariant {
- B, H, S, D, Q
- };
-
private:
static short SIMD_Size_in_bytes[];
@@ -2324,6 +2400,11 @@ void mvnw(Register Rd, Register Rm,
INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
+ INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
+ INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
#undef INSN
@@ -2343,6 +2424,8 @@ void mvnw(Register Rd, Register Rm,
INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
+ INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
+ INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S
INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
@@ -2407,6 +2490,9 @@ void mvnw(Register Rd, Register Rm,
INSN(fmls, 0, 1, 0b110011);
INSN(fmax, 0, 0, 0b111101);
INSN(fmin, 0, 1, 0b111101);
+ INSN(fcmeq, 0, 0, 0b111001);
+ INSN(fcmgt, 1, 1, 0b111001);
+ INSN(fcmge, 1, 0, 0b111001);
#undef INSN
@@ -2464,6 +2550,40 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
+ starti; \
+ assert(T == T16B, "arrangement must be T16B"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(eor3, 0b000);
+ INSN(bcax, 0b001);
+
+#undef INSN
+
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
+ starti; \
+ assert(T == T2D, "arrangement must be T2D"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(xar, 0b100);
+
+#undef INSN
+
+#define INSN(NAME, opc) \
+ void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+ starti; \
+ assert(T == T2D, "arrangement must be T2D"); \
+ f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
+ }
+
+ INSN(rax1, 0b011);
+
+#undef INSN
+
#define INSN(NAME, opc) \
void NAME(FloatRegister Vd, FloatRegister Vn) { \
starti; \
@@ -2506,10 +2626,20 @@ void mvnw(Register Rd, Register Rm,
rf(Vn, 5), rf(Vd, 0);
}
- // (double) {a, b} -> (a + b)
- void faddpd(FloatRegister Vd, FloatRegister Vn) {
+ // (long) {a, b} -> (a + b)
+ void addpd(FloatRegister Vd, FloatRegister Vn) {
starti;
- f(0b0111111001110000110110, 31, 10);
+ f(0b0101111011110001101110, 31, 10);
+ rf(Vn, 5), rf(Vd, 0);
+ }
+
+ // (Floating-point) {a, b} -> (a + b)
+ void faddp(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
+ assert(type == D || type == S, "Wrong type for faddp");
+ starti;
+ f(0b011111100, 31, 23);
+ f(type == D ? 1 : 0, 22);
+ f(0b110000110110, 21, 10);
rf(Vn, 5), rf(Vd, 0);
}
@@ -2558,6 +2688,8 @@ void mvnw(Register Rd, Register Rm,
INSN(shl, 0, 0b010101, /* isSHR = */ false);
INSN(sshr, 0, 0b000001, /* isSHR = */ true);
INSN(ushr, 1, 0b000001, /* isSHR = */ true);
+ INSN(usra, 1, 0b000101, /* isSHR = */ true);
+ INSN(ssra, 0, 0b000101, /* isSHAR =*/ true);
#undef INSN
@@ -2576,29 +2708,48 @@ void mvnw(Register Rd, Register Rm,
#undef INSN
private:
- void _ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
starti;
/* The encodings for the immh:immb fields (bits 22:16) are
- * 0001 xxx 8H, 8B/16b shift = xxx
+ * 0001 xxx 8H, 8B/16B shift = xxx
* 001x xxx 4S, 4H/8H shift = xxxx
* 01xx xxx 2D, 2S/4S shift = xxxxx
* 1xxx xxx RESERVED
*/
assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
- f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
+ f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
+ f((1 << ((Tb>>1)+3))|shift, 22, 16);
f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
}
public:
void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
- _ushll(Vd, Ta, Vn, Tb, shift);
+ _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
}
void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
- _ushll(Vd, Ta, Vn, Tb, shift);
+ _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ ushll(Vd, Ta, Vn, Tb, 0);
+ }
+
+ void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
+ _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+ assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
+ _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
+ }
+
+ void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) {
+ sshll(Vd, Ta, Vn, Tb, 0);
}
// Move from general purpose register
@@ -2649,6 +2800,15 @@ void mvnw(Register Rd, Register Rm,
f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
}
+ void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
+ starti;
+ int size_b = (int)Tb >> 1;
+ int size_a = (int)Ta >> 1;
+ assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
+ f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
+ f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
+ }
+
void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
{
starti;
@@ -3062,13 +3222,6 @@ void mvnw(Register Rd, Register Rm,
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- ShouldNotCallThis();
- return RegisterOrConstant();
- }
-
// Stack overflow checking
virtual void bang_stack_with_offset(int offset);
diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
index 99469bb04c0..119bc979e0a 100644
--- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp
@@ -38,6 +38,19 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset());
+ __ adr(rscratch1, safepoint_pc);
+ __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
+
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ __ far_jump(RuntimeAddress(stub));
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
Metadata *m = _method->as_constant_ptr()->as_metadata();
diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
index b5ab058d44c..8dac1d9ebe8 100644
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp
@@ -504,7 +504,7 @@ void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
}
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,");
// Pop the stack before the safepoint code
@@ -514,7 +514,9 @@ void LIR_Assembler::return_op(LIR_Opr result) {
__ reserved_stack_check();
}
- __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type);
+ code_stub->set_safepoint_offset(__ offset());
+ __ relocate(relocInfo::poll_return_type);
+ __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */);
__ ret(lr);
}
diff --git a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
index 4e4262d5d6d..d2520014ed1 100644
--- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp
@@ -34,8 +34,6 @@
#ifndef TIERED
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, true );
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
index 24b32187b7c..032e9e80756 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp
@@ -538,6 +538,70 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
BIND(DONE);
}
+void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2, Register tmp3)
+{
+ Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE;
+ Register cnt1_neg = cnt1;
+ Register ch1 = rscratch1;
+ Register result_tmp = rscratch2;
+
+ cbz(cnt1, NOMATCH);
+
+ cmp(cnt1, (u1)8);
+ br(LT, DO1_SHORT);
+
+ orr(ch, ch, ch, LSL, 8);
+ orr(ch, ch, ch, LSL, 16);
+ orr(ch, ch, ch, LSL, 32);
+
+ sub(cnt1, cnt1, 8);
+ mov(result_tmp, cnt1);
+ lea(str1, Address(str1, cnt1));
+ sub(cnt1_neg, zr, cnt1);
+
+ mov(tmp3, 0x0101010101010101);
+
+ BIND(CH1_LOOP);
+ ldr(ch1, Address(str1, cnt1_neg));
+ eor(ch1, ch, ch1);
+ sub(tmp1, ch1, tmp3);
+ orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f);
+ bics(tmp1, tmp1, tmp2);
+ br(NE, HAS_ZERO);
+ adds(cnt1_neg, cnt1_neg, 8);
+ br(LT, CH1_LOOP);
+
+ cmp(cnt1_neg, (u1)8);
+ mov(cnt1_neg, 0);
+ br(LT, CH1_LOOP);
+ b(NOMATCH);
+
+ BIND(HAS_ZERO);
+ rev(tmp1, tmp1);
+ clz(tmp1, tmp1);
+ add(cnt1_neg, cnt1_neg, tmp1, LSR, 3);
+ b(MATCH);
+
+ BIND(DO1_SHORT);
+ mov(result_tmp, cnt1);
+ lea(str1, Address(str1, cnt1));
+ sub(cnt1_neg, zr, cnt1);
+ BIND(DO1_LOOP);
+ ldrb(ch1, Address(str1, cnt1_neg));
+ cmp(ch, ch1);
+ br(EQ, MATCH);
+ adds(cnt1_neg, cnt1_neg, 1);
+ br(LT, DO1_LOOP);
+ BIND(NOMATCH);
+ mov(result, -1);
+ b(DONE);
+ BIND(MATCH);
+ add(result, result_tmp, cnt1_neg);
+ BIND(DONE);
+}
+
// Compare strings.
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
index f359e35974a..b2f6226bf9e 100644
--- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp
@@ -45,4 +45,8 @@
Register ch, Register result,
Register tmp1, Register tmp2, Register tmp3);
+ void stringL_indexof_char(Register str1, Register cnt1,
+ Register ch, Register result,
+ Register tmp1, Register tmp2, Register tmp3);
+
#endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
index 973cbe740bd..5a019eba6ae 100644
--- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp
@@ -33,8 +33,6 @@
// (see c2_globals.hpp). Alpha-sorted.
define_pd_global(bool, BackgroundCompilation, true);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(bool, CICompileOSR, true);
define_pd_global(bool, InlineIntrinsics, true);
define_pd_global(bool, PreferInterpreterNativeStubs, false);
diff --git a/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
new file mode 100644
index 00000000000..fb36406fbde
--- /dev/null
+++ b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp
@@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ masm.
+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ RuntimeAddress callback_addr(stub);
+
+ __ bind(entry->_stub_label);
+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+ __ adr(rscratch1, safepoint_pc);
+ __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset()));
+ __ far_jump(callback_addr);
+}
+#undef __
diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
index 75cc249cf08..2e89960778e 100644
--- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp
@@ -36,6 +36,9 @@
#define __ _masm.
address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
+ precond(cbuf.stubs()->start() != badAddress);
+ precond(cbuf.stubs()->end() != badAddress);
+
// Stub is fixed up when the corresponding call is converted from
// calling compiled code to calling interpreted code.
// mov rmethod, 0
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
index 46261c70dbe..15c5e16f380 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp
@@ -37,6 +37,7 @@
#include "runtime/monitorChunk.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/signature.hpp"
+#include "runtime/stackWatermarkSet.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "vmreg_aarch64.inline.hpp"
@@ -476,8 +477,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
}
//------------------------------------------------------------------------------
-// frame::sender
-frame frame::sender(RegisterMap* map) const {
+// frame::sender_raw
+frame frame::sender_raw(RegisterMap* map) const {
// Default is we done have to follow them. The sender_for_xxx will
// update it accordingly
map->set_include_argument_oops(false);
@@ -499,6 +500,16 @@ frame frame::sender(RegisterMap* map) const {
return frame(sender_sp(), link(), sender_pc());
}
+frame frame::sender(RegisterMap* map) const {
+ frame result = sender_raw(map);
+
+ if (map->process_frames()) {
+ StackWatermarkSet::on_iteration(map->thread(), result);
+ }
+
+ return result;
+}
+
bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
assert(is_interpreted_frame(), "Not an interpreted frame");
// These are reasonable sanity checks
@@ -651,11 +662,12 @@ intptr_t* frame::real_fp() const {
#undef DESCRIBE_FP_OFFSET
-#define DESCRIBE_FP_OFFSET(name) \
- { \
- uintptr_t *p = (uintptr_t *)fp; \
- printf("0x%016lx 0x%016lx %s\n", (uintptr_t)(p + frame::name##_offset), \
- p[frame::name##_offset], #name); \
+#define DESCRIBE_FP_OFFSET(name) \
+ { \
+ uintptr_t *p = (uintptr_t *)fp; \
+ printf(INTPTR_FORMAT " " INTPTR_FORMAT " %s\n", \
+ (uintptr_t)(p + frame::name##_offset), \
+ p[frame::name##_offset], #name); \
}
static THREAD_LOCAL uintptr_t nextfp;
diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
index 6c639a05961..e2490d28611 100644
--- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp
@@ -161,4 +161,7 @@
static jint interpreter_frame_expression_stack_direction() { return -1; }
+ // returns the sending frame, without applying any barriers
+ frame sender_raw(RegisterMap* map) const;
+
#endif // CPU_AARCH64_FRAME_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
index b3530509b03..db9c7577e60 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp
@@ -109,7 +109,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
__ xchg(access.resolved_addr(), value_opr, result, tmp);
if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false);
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), ShenandoahBarrierSet::AccessKind::NORMAL);
LIR_Opr tmp = gen->new_register(type);
__ move(result, tmp);
result = tmp;
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
index c2d53df4f67..840464b251f 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp
@@ -43,8 +43,6 @@
#define __ masm->
-address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-
void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
Register src, Register dst, Register count, RegSet saved_regs) {
if (is_oop) {
@@ -227,18 +225,18 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind) {
assert(ShenandoahLoadRefBarrier, "Should be enabled");
assert(dst != rscratch2, "need rscratch2");
assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2);
- Label done;
+ Label heap_stable, not_cset;
__ enter();
Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
__ ldrb(rscratch2, gc_state);
// Check for heap stability
- __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done);
+ __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable);
// use r1 for load address
Register result_dst = dst;
@@ -253,51 +251,48 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
__ lea(r1, load_addr);
__ mov(r0, dst);
- __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
-
- __ mov(result_dst, r0);
- __ pop(to_save, sp);
-
- __ bind(done);
- __ leave();
-}
-
-void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr) {
- if (!ShenandoahLoadRefBarrier) {
- return;
+ // Test for in-cset
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
+ __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ ldrb(rscratch2, Address(rscratch2, rscratch1));
+ __ tbz(rscratch2, 0, not_cset);
}
- assert(dst != rscratch2, "need rscratch2");
-
- Label is_null;
- Label done;
-
- __ block_comment("load_reference_barrier_native { ");
-
- __ cbz(dst, is_null);
-
- __ enter();
-
- Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- __ ldrb(rscratch2, gc_state);
-
- // Check for heap in evacuation phase
- __ tbz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, done);
-
- __ mov(rscratch2, dst);
__ push_call_clobbered_registers();
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
- __ lea(r1, load_addr);
- __ mov(r0, rscratch2);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ break;
+ default:
+ ShouldNotReachHere();
+ }
__ blr(lr);
- __ mov(rscratch2, r0);
+ __ mov(rscratch1, r0);
__ pop_call_clobbered_registers();
- __ mov(dst, rscratch2);
+ __ mov(r0, rscratch1);
- __ bind(done);
+ __ bind(not_cset);
+
+ __ mov(result_dst, r0);
+ __ pop(to_save, sp);
+
+ __ bind(heap_stable);
__ leave();
- __ bind(is_null);
- __ block_comment("} load_reference_barrier_native");
}
void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
@@ -308,15 +303,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Regis
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) {
- if (ShenandoahLoadRefBarrier) {
- Label is_null;
- __ cbz(dst, is_null);
- load_reference_barrier_not_null(masm, dst, load_addr);
- __ bind(is_null);
- }
-}
-
//
// Arguments:
//
@@ -352,11 +338,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) {
- load_reference_barrier_native(masm, dst, src);
- } else {
- load_reference_barrier(masm, dst, src);
- }
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type);
+ load_reference_barrier(masm, dst, src, kind);
if (dst != result_dst) {
__ mov(result_dst, dst);
@@ -477,7 +460,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
bool is_narrow = UseCompressedOops;
Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword;
- assert_different_registers(addr, expected, new_val, tmp1, tmp2);
+ assert_different_registers(addr, expected, tmp1, tmp2);
+ assert_different_registers(addr, new_val, tmp1, tmp2);
Label step4, done;
@@ -669,10 +653,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
__ bind(slow_path);
ce->store_parameter(res, 0);
ce->store_parameter(addr, 1);
- if (stub->is_native()) {
- __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
- } else {
- __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+ switch (stub->kind()) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ b(*stub->continuation());
@@ -728,19 +720,33 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ epilogue();
}
-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) {
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) {
__ prologue("shenandoah_load_reference_barrier", false);
// arg0 : object to be resolved
__ push_call_clobbered_registers();
__ load_parameter(0, r0);
__ load_parameter(1, r1);
- if (is_native) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native));
- } else if (UseCompressedOops) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
- } else {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow));
+ } else {
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ blr(lr);
__ mov(rscratch1, r0);
@@ -753,67 +759,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
-
-address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
- assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
- return _shenandoah_lrb;
-}
-
-#define __ cgen->assembler()->
-
-// Shenandoah load reference barrier.
-//
-// Input:
-// r0: OOP to evacuate. Not null.
-// r1: load address
-//
-// Output:
-// r0: Pointer to evacuated OOP.
-//
-// Trash rscratch1, rscratch2. Preserve everything else.
-address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
-
- __ align(6);
- StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
- address start = __ pc();
-
- Label slow_path;
- __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr());
- __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint());
- __ ldrb(rscratch2, Address(rscratch2, rscratch1));
- __ tbnz(rscratch2, 0, slow_path);
- __ ret(lr);
-
- __ bind(slow_path);
- __ enter(); // required for proper stackwalking of RuntimeStub frame
-
- __ push_call_clobbered_registers();
-
- if (UseCompressedOops) {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow));
- } else {
- __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier));
- }
- __ blr(lr);
- __ mov(rscratch1, r0);
- __ pop_call_clobbered_registers();
- __ mov(r0, rscratch1);
-
- __ leave(); // required for proper stackwalking of RuntimeStub frame
- __ ret(lr);
-
- return start;
-}
-
-#undef __
-
-void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
- if (ShenandoahLoadRefBarrier) {
- int stub_code_size = 2048;
- ResourceMark rm;
- BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
- CodeBuffer buf(bb);
- StubCodeGenerator cgen(&buf);
- _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
- }
-}
diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
index 88aa9a2b95f..60303725fd8 100644
--- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp
@@ -27,6 +27,7 @@
#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
#ifdef COMPILER1
class LIR_Assembler;
class ShenandoahPreBarrierStub;
@@ -38,8 +39,6 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- static address _shenandoah_lrb;
-
void satb_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -57,14 +56,9 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg);
void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg);
- void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr);
- void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr);
- void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr);
-
- address generate_shenandoah_lrb(StubCodeGenerator* cgen);
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind);
public:
- static address shenandoah_lrb();
void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp);
@@ -72,7 +66,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind);
#endif
virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
@@ -85,8 +79,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
Register obj, Register tmp, Label& slowpath);
void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val,
bool acquire, bool release, bool is_cae, Register result);
-
- virtual void barrier_stubs_init();
};
#endif // CPU_AARCH64_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_AARCH64_HPP
diff --git a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
index 35e261fa7ae..3187808b65a 100644
--- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp
@@ -24,10 +24,9 @@
#ifndef CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP
#define CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP
-const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-const size_t ZPlatformHeapViews = 3;
-const size_t ZPlatformNMethodDisarmedOffset = 4;
-const size_t ZPlatformCacheLineSize = 64;
+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+const size_t ZPlatformHeapViews = 3;
+const size_t ZPlatformCacheLineSize = 64;
size_t ZPlatformAddressOffsetBits();
size_t ZPlatformAddressMetadataShift();
diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
index 294b6b13495..9ad1360fa91 100644
--- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp
@@ -93,6 +93,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use SIMD instructions in generated array equals code") \
product(bool, UseSimpleArrayEquals, false, \
"Use simpliest and shortest implementation for array equals") \
+ product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \
+ "Use SIMD instructions for left/right shift of BigInteger") \
product(bool, AvoidUnalignedAccesses, false, \
"Avoid generating unaligned memory accesses") \
product(bool, UseLSE, false, \
diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
index 1d635429336..09632154630 100644
--- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp
@@ -473,7 +473,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (needs_thread_local_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- ldr(rscratch2, Address(rthread, Thread::polling_page_offset()));
+ ldr(rscratch2, Address(rthread, Thread::polling_word_offset()));
tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint);
}
@@ -521,6 +521,7 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
// remove activation
//
+// Apply stack watermark barrier.
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from syncronized blocks.
// Remove the activation from the stack.
@@ -541,6 +542,21 @@ void InterpreterMacroAssembler::remove_activation(
// result check if synchronized method
Label unlocked, unlock, no_unlock;
+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+ // that would normally not be safe to use. Such bad returns into unsafe territory of
+ // the stack, will call InterpreterRuntime::at_unwind.
+ Label slow_path;
+ Label fast_path;
+ safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */);
+ br(Assembler::AL, fast_path);
+ bind(slow_path);
+ push(state);
+ set_last_Java_frame(esp, rfp, (address)pc(), rscratch1);
+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread);
+ reset_last_Java_frame(true);
+ pop(state);
+ bind(fast_path);
+
// get the value of _do_not_unlock_if_synchronized into r3
const Address do_not_unlock_if_synchronized(rthread,
in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
diff --git a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
index 3156b4b8e83..f41d79e1021 100644
--- a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp
@@ -21,8 +21,9 @@
* questions.
*/
- #include "jvmci/jvmci.hpp"
- #include "jvmci/jvmciCodeInstaller.hpp"
+#include "precompiled.hpp"
+#include "jvmci/jvmci.hpp"
+#include "jvmci/jvmciCodeInstaller.hpp"
#include "jvmci/jvmciRuntime.hpp"
#include "jvmci/jvmciCompilerToVM.hpp"
#include "jvmci/jvmciJavaClasses.hpp"
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
index 81fd87614e5..005ad3f5930 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
@@ -288,27 +288,21 @@ address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
return address(((uint64_t)insn_addr + (offset << 2)));
}
-void MacroAssembler::safepoint_poll(Label& slow_path) {
- ldr(rscratch1, Address(rthread, Thread::polling_page_offset()));
- tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
-}
-
-// Just like safepoint_poll, but use an acquiring load for thread-
-// local polling.
-//
-// We need an acquire here to ensure that any subsequent load of the
-// global SafepointSynchronize::_state flag is ordered after this load
-// of the local Thread::_polling page. We don't want this poll to
-// return false (i.e. not safepointing) and a later poll of the global
-// SafepointSynchronize::_state spuriously to return true.
-//
-// This is to avoid a race when we're in a native->Java transition
-// racing the code which wakes up from a safepoint.
-//
-void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
- lea(rscratch1, Address(rthread, Thread::polling_page_offset()));
- ldar(rscratch1, rscratch1);
- tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
+ if (acquire) {
+ lea(rscratch1, Address(rthread, Thread::polling_word_offset()));
+ ldar(rscratch1, rscratch1);
+ } else {
+ ldr(rscratch1, Address(rthread, Thread::polling_word_offset()));
+ }
+ if (at_return) {
+ // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
+ // we may safely use the sp instead to perform the stack watermark check.
+ cmp(in_nmethod ? sp : rfp, rscratch1);
+ br(Assembler::HI, slow_path);
+ } else {
+ tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
+ }
}
void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
@@ -711,7 +705,7 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in
// Maybe emit a call via a trampoline. If the code cache is small
// trampolines won't be emitted.
-address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
+address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
assert(JavaThread::current()->is_Compiler_thread(), "just checking");
assert(entry.rspec().type() == relocInfo::runtime_call_type
|| entry.rspec().type() == relocInfo::opt_virtual_call_type
@@ -732,6 +726,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
if (!in_scratch_emit_size) {
address stub = emit_trampoline_stub(offset(), entry.target());
if (stub == NULL) {
+ postcond(pc() == badAddress);
return NULL; // CodeCache is full
}
}
@@ -745,6 +740,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
bl(pc());
}
// just need to return a non-null address
+ postcond(pc() != badAddress);
return pc();
}
@@ -938,23 +934,6 @@ void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
-
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0)
- return RegisterOrConstant(value + offset);
-
- // load indirectly to solve generation ordering problem
- ldr(tmp, ExternalAddress((address) delayed_value_addr));
-
- if (offset != 0)
- add(tmp, tmp, offset);
-
- return RegisterOrConstant(tmp);
-}
-
// Look up the method for a megamorphic invokeinterface call.
// The target method is determined by .
// The receiver klass is in recv_klass.
@@ -1834,7 +1813,7 @@ bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size
return true;
} else {
assert(size_in_bytes == 8 || size_in_bytes == 4, "only 8 bytes or 4 bytes load/store is supported.");
- const unsigned mask = size_in_bytes - 1;
+ const uint64_t mask = size_in_bytes - 1;
if (adr.getMode() == Address::base_plus_offset &&
(adr.offset() & mask) == 0) { // only supports base_plus_offset.
code()->set_last_insn(pc());
@@ -2898,7 +2877,7 @@ void MacroAssembler::merge_ldst(Register rt,
// Overwrite previous generated binary.
code_section()->set_end(prev);
- const int sz = prev_ldst->size_in_bytes();
+ const size_t sz = prev_ldst->size_in_bytes();
assert(sz == 8 || sz == 4, "only supports 64/32bit merging.");
if (!is_store) {
BLOCK_COMMENT("merged ldr pair");
@@ -4405,13 +4384,6 @@ void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype)
ldr(dest, Address(rthread, Thread::polling_page_offset()));
}
-// Move the address of the polling page into r, then read the polling
-// page.
-address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) {
- get_polling_page(r, rtype);
- return read_polling_page(r, rtype);
-}
-
// Read the polling page. The address of the polling page must
// already be in r.
address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
@@ -4503,7 +4475,7 @@ void MacroAssembler::remove_frame(int framesize) {
// This method checks if provided byte array contains byte with highest bit set.
-void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
+address MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
// Simple and most common case of aligned small array which is not at the
// end of memory page is placed here. All other cases are in stub.
Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
@@ -4540,27 +4512,38 @@ void MacroAssembler::has_negatives(Register ary1, Register len, Register result)
b(SET_RESULT);
BIND(STUB);
- RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives());
+ RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives());
assert(has_neg.target() != NULL, "has_negatives stub has not been generated");
- trampoline_call(has_neg);
+ address tpc1 = trampoline_call(has_neg);
+ if (tpc1 == NULL) {
+ DEBUG_ONLY(reset_labels(STUB_LONG, SET_RESULT, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
BIND(STUB_LONG);
- RuntimeAddress has_neg_long = RuntimeAddress(
- StubRoutines::aarch64::has_negatives_long());
+ RuntimeAddress has_neg_long = RuntimeAddress(StubRoutines::aarch64::has_negatives_long());
assert(has_neg_long.target() != NULL, "has_negatives stub has not been generated");
- trampoline_call(has_neg_long);
+ address tpc2 = trampoline_call(has_neg_long);
+ if (tpc2 == NULL) {
+ DEBUG_ONLY(reset_labels(SET_RESULT, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
BIND(SET_RESULT);
cset(result, NE); // set true or false
BIND(DONE);
+ postcond(pc() != badAddress);
+ return pc();
}
-void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
- Register tmp4, Register tmp5, Register result,
- Register cnt1, int elem_size) {
+address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
+ Register tmp4, Register tmp5, Register result,
+ Register cnt1, int elem_size) {
Label DONE, SAME;
Register tmp1 = rscratch1;
Register tmp2 = rscratch2;
@@ -4664,7 +4647,7 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
}
}
} else {
- Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB, EARLY_OUT,
+ Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB,
CSET_EQ, LAST_CHECK;
mov(result, false);
cbz(a1, DONE);
@@ -4723,10 +4706,14 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
cbnz(tmp5, DONE);
RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals());
assert(stub.target() != NULL, "array_equals_long stub has not been generated");
- trampoline_call(stub);
+ address tpc = trampoline_call(stub);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(DONE);
- bind(EARLY_OUT);
// (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2)
// so, if a2 == null => return false(0), else return true, so we can return a2
mov(result, a2);
@@ -4753,6 +4740,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
bind(DONE);
BLOCK_COMMENT("} array_equals");
+ postcond(pc() != badAddress);
+ return pc();
}
// Compare Strings
@@ -4860,7 +4849,7 @@ const int MacroAssembler::zero_words_block_size = 8;
// cnt: Count in HeapWords.
//
// ptr, cnt, rscratch1, and rscratch2 are clobbered.
-void MacroAssembler::zero_words(Register ptr, Register cnt)
+address MacroAssembler::zero_words(Register ptr, Register cnt)
{
assert(is_power_of_2(zero_words_block_size), "adjust this");
assert(ptr == r10 && cnt == r11, "mismatch in register usage");
@@ -4870,10 +4859,15 @@ void MacroAssembler::zero_words(Register ptr, Register cnt)
Label around;
br(LO, around);
{
- RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
+ RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks());
assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
if (StubRoutines::aarch64::complete()) {
- trampoline_call(zero_blocks);
+ address tpc = trampoline_call(zero_blocks);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(around));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
} else {
bl(zero_blocks);
}
@@ -4894,6 +4888,8 @@ void MacroAssembler::zero_words(Register ptr, Register cnt)
bind(l);
}
BLOCK_COMMENT("} zero_words");
+ postcond(pc() != badAddress);
+ return pc();
}
// base: Address of a buffer to be zeroed, 8 bytes aligned.
@@ -4906,14 +4902,15 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt)
if (i) str(zr, Address(base));
if (cnt <= SmallArraySize / BytesPerLong) {
- for (; i < (int)cnt; i += 2)
+ for (; i < (int)cnt; i += 2) {
stp(zr, zr, Address(base, i * wordSize));
+ }
} else {
const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll
int remainder = cnt % (2 * unroll);
- for (; i < remainder; i += 2)
+ for (; i < remainder; i += 2) {
stp(zr, zr, Address(base, i * wordSize));
-
+ }
Label loop;
Register cnt_reg = rscratch1;
Register loop_base = rscratch2;
@@ -4923,8 +4920,9 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt)
add(loop_base, base, (remainder - 2) * wordSize);
bind(loop);
sub(cnt_reg, cnt_reg, 2 * unroll);
- for (i = 1; i < unroll; i++)
+ for (i = 1; i < unroll; i++) {
stp(zr, zr, Address(loop_base, 2 * i * wordSize));
+ }
stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize)));
cbnz(cnt_reg, loop);
}
@@ -5140,9 +5138,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst,
// Inflate byte[] array to char[].
-void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
- FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
- Register tmp4) {
+address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
+ FloatRegister vtmp1, FloatRegister vtmp2,
+ FloatRegister vtmp3, Register tmp4) {
Label big, done, after_init, to_stub;
assert_different_registers(src, dst, len, tmp4, rscratch1);
@@ -5179,9 +5177,14 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
if (SoftwarePrefetchHintDistance >= 0) {
bind(to_stub);
- RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
+ RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate());
assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated");
- trampoline_call(stub);
+ address tpc = trampoline_call(stub);
+ if (tpc == NULL) {
+ DEBUG_ONLY(reset_labels(big, done));
+ postcond(pc() == badAddress);
+ return NULL;
+ }
b(after_init);
}
@@ -5235,6 +5238,8 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
strq(vtmp3, Address(dst, -16));
bind(done);
+ postcond(pc() != badAddress);
+ return pc();
}
// Compress char[] array to byte[].
diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
index 998f1afc1c7..1d597fb429c 100644
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
@@ -102,8 +102,7 @@ class MacroAssembler: public Assembler {
virtual void check_and_handle_popframe(Register java_thread);
virtual void check_and_handle_earlyret(Register java_thread);
- void safepoint_poll(Label& slow_path);
- void safepoint_poll_acquire(Label& slow_path);
+ void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
// Biased locking support
// lock_reg and obj_reg must be loaded up with the appropriate values.
@@ -1014,10 +1013,6 @@ class MacroAssembler: public Assembler {
// Check for reserved stack access in method being exited (for JIT)
void reserved_stack_check();
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
// Arithmetics
void addptr(const Address &dst, int32_t src);
@@ -1063,10 +1058,24 @@ class MacroAssembler: public Assembler {
private:
void compare_eq(Register rn, Register rm, enum operand_size size);
+#ifdef ASSERT
+ // Template short-hand support to clean-up after a failed call to trampoline
+ // call generation (see trampoline_call() below), when a set of Labels must
+ // be reset (before returning).
+ template
+ void reset_labels(Label &lbl, More&... more) {
+ lbl.reset(); reset_labels(more...);
+ }
+ template
+ void reset_labels(Label &lbl) {
+ lbl.reset();
+ }
+#endif
+
public:
// Calls
- address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
+ address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
static bool far_branches() {
return ReservedCodeCacheSize > branch_range || UseAOT;
@@ -1231,7 +1240,6 @@ class MacroAssembler: public Assembler {
address read_polling_page(Register r, relocInfo::relocType rtype);
void get_polling_page(Register dest, relocInfo::relocType rtype);
- address fetch_and_read_polling_page(Register r, relocInfo::relocType rtype);
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
void update_byte_crc32(Register crc, Register val, Register table);
@@ -1239,24 +1247,24 @@ class MacroAssembler: public Assembler {
Register table0, Register table1, Register table2, Register table3,
bool upper = false);
- void has_negatives(Register ary1, Register len, Register result);
+ address has_negatives(Register ary1, Register len, Register result);
- void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
- Register tmp1, Register tmp2, Register tmp3, int elem_size);
+ address arrays_equals(Register a1, Register a2, Register result, Register cnt1,
+ Register tmp1, Register tmp2, Register tmp3, int elem_size);
void string_equals(Register a1, Register a2, Register result, Register cnt1,
int elem_size);
void fill_words(Register base, Register cnt, Register value);
void zero_words(Register base, uint64_t cnt);
- void zero_words(Register ptr, Register cnt);
+ address zero_words(Register ptr, Register cnt);
void zero_dcache_blocks(Register base, Register cnt);
static const int zero_words_block_size;
- void byte_array_inflate(Register src, Register dst, Register len,
- FloatRegister vtmp1, FloatRegister vtmp2,
- FloatRegister vtmp3, Register tmp4);
+ address byte_array_inflate(Register src, Register dst, Register len,
+ FloatRegister vtmp1, FloatRegister vtmp2,
+ FloatRegister vtmp3, Register tmp4);
void char_array_compress(Register src, Register dst, Register len,
FloatRegister tmp1Reg, FloatRegister tmp2Reg,
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index d40c533a82c..dcf87913a88 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -655,7 +655,7 @@ class NativeLdSt : public NativeInstruction {
return 0;
}
}
- size_t size_in_bytes() { return 1 << size(); }
+ size_t size_in_bytes() { return 1ULL << size(); }
bool is_not_pre_post_index() { return (is_ldst_ur() || is_ldst_unsigned_offset()); }
bool is_load() {
assert(Instruction_aarch64::extract(uint_at(0), 23, 22) == 0b01 ||
diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
index d556d957e6b..92a07a84d2a 100644
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp
@@ -38,6 +38,7 @@
#include "nativeInst_aarch64.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1080,20 +1081,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR
}
}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- int stack_slots,
- int total_c_args,
- int total_in_args,
- int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) { Unimplemented(); }
-
// Unpack an array argument into a pointer to the body and the length
// if the array is non-null, otherwise pass 0 for both.
static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); }
@@ -1259,25 +1246,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they block out GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
-//
nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const methodHandle& method,
int compile_id,
@@ -1524,7 +1498,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Generate stack overflow check
if (UseStackBanging) {
- __ bang_stack_with_offset(StackOverflow::stack_shadow_zone_size());
+ __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size()));
} else {
Unimplemented();
}
@@ -1545,11 +1519,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
const Register oop_handle_reg = r20;
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
- oop_handle_offset, oop_maps, in_regs, in_sig_bt);
- }
-
//
// We immediately shuffle the arguments so that any vm call we have to
// make from here on out (sync slow path, jvmti, etc.) we will have
@@ -1822,12 +1791,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// get JNIEnv* which is first argument to native
if (!is_critical_native) {
__ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
- }
- // Now set thread in native
- __ mov(rscratch1, _thread_in_native);
- __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
- __ stlrw(rscratch1, rscratch2);
+ // Now set thread in native
+ __ mov(rscratch1, _thread_in_native);
+ __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+ __ stlrw(rscratch1, rscratch2);
+ }
rt_call(masm, native_func);
@@ -1855,6 +1824,21 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
default : ShouldNotReachHere();
}
+ Label safepoint_in_progress, safepoint_in_progress_done;
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ __ safepoint_poll(needs_safepoint, false /* at_return */, true /* acquire */, false /* in_nmethod */);
+ __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
+ __ cbnzw(rscratch1, needs_safepoint);
+ __ b(after_transition);
+ __ bind(needs_safepoint);
+ }
+
// Switch thread to "native transition" state before reading the synchronization state.
// This additional state is necessary because reading and testing the synchronization
// state is not atomic w.r.t. GC, as this scenario demonstrates:
@@ -1875,16 +1859,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
}
// check for safepoint operation in progress and/or pending suspend requests
- Label safepoint_in_progress, safepoint_in_progress_done;
{
- __ safepoint_poll_acquire(safepoint_in_progress);
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ //
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+
+ __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
__ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbnzw(rscratch1, safepoint_in_progress);
__ bind(safepoint_in_progress_done);
}
// change thread state
- Label after_transition;
__ mov(rscratch1, _thread_in_Java);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
__ stlrw(rscratch1, rscratch2);
@@ -2089,22 +2080,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
#ifndef PRODUCT
assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
#endif
- if (!is_critical_native) {
- __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
- } else {
- __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
- }
+ __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
__ blr(rscratch1);
__ maybe_isb();
// Restore any method result value
restore_native_result(masm, ret_type, stack_slots);
- if (is_critical_native) {
- // The call above performed the transition to thread_in_Java so
- // skip the transition logic above.
- __ b(after_transition);
- }
-
__ b(safepoint_in_progress_done);
__ block_comment("} safepoint");
}
@@ -2153,12 +2134,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
-
}
// this function returns the adjust size (in number of words) to a c2i adapter
@@ -2469,7 +2445,7 @@ void SharedRuntime::generate_deopt_blob() {
__ sub(sp, sp, r19);
// Push interpreter frames in a loop
- __ mov(rscratch1, (address)0xDEADDEAD); // Make a recognizable pattern
+ __ mov(rscratch1, (uint64_t)0xDEADDEAD); // Make a recognizable pattern
__ mov(rscratch2, rscratch1);
Label loop;
__ bind(loop);
diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
index 412578eea5c..09ea5387165 100644
--- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp
@@ -611,6 +611,16 @@ class StubGenerator: public StubCodeGenerator {
void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); }
+ // Generate indices for iota vector.
+ address generate_iota_indices(const char *stub_name) {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", stub_name);
+ address start = __ pc();
+ __ emit_data64(0x0706050403020100, relocInfo::none);
+ __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none);
+ return start;
+ }
+
// The inner part of zero_words(). This is the bulk operation,
// zeroing words in blocks, possibly using DC ZVA to do it. The
// caller is responsible for zeroing the last few words.
@@ -1295,14 +1305,14 @@ class StubGenerator: public StubCodeGenerator {
// Scan over array at a for count oops, verifying each one.
// Preserves a and count, clobbers rscratch1 and rscratch2.
- void verify_oop_array (size_t size, Register a, Register count, Register temp) {
+ void verify_oop_array (int size, Register a, Register count, Register temp) {
Label loop, end;
__ mov(rscratch1, a);
__ mov(rscratch2, zr);
__ bind(loop);
__ cmp(rscratch2, count);
__ br(Assembler::HS, end);
- if (size == (size_t)wordSize) {
+ if (size == wordSize) {
__ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
__ verify_oop(temp);
} else {
@@ -1333,7 +1343,7 @@ class StubGenerator: public StubCodeGenerator {
// disjoint_int_copy_entry is set to the no-overlap entry point
// used by generate_conjoint_int_oop_copy().
//
- address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry,
+ address generate_disjoint_copy(int size, bool aligned, bool is_oop, address *entry,
const char *name, bool dest_uninitialized = false) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
RegSet saved_reg = RegSet::of(s, d, count);
@@ -1399,7 +1409,7 @@ class StubGenerator: public StubCodeGenerator {
// the hardware handle it. The two dwords within qwords that span
// cache line boundaries will still be loaded and stored atomicly.
//
- address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target,
+ address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target,
address *entry, const char *name,
bool dest_uninitialized = false) {
Register s = c_rarg0, d = c_rarg1, count = c_rarg2;
@@ -1650,7 +1660,7 @@ class StubGenerator: public StubCodeGenerator {
address generate_disjoint_oop_copy(bool aligned, address *entry,
const char *name, bool dest_uninitialized) {
const bool is_oop = true;
- const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized);
}
@@ -1668,7 +1678,7 @@ class StubGenerator: public StubCodeGenerator {
address nooverlap_target, address *entry,
const char *name, bool dest_uninitialized) {
const bool is_oop = true;
- const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
+ const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong);
return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry,
name, dest_uninitialized);
}
@@ -3299,6 +3309,225 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // Arguments:
+ //
+ // Inputs:
+ // c_rarg0 - byte[] source+offset
+ // c_rarg1 - byte[] SHA.state
+ // c_rarg2 - int digest_length
+ // c_rarg3 - int offset
+ // c_rarg4 - int limit
+ //
+ address generate_sha3_implCompress(bool multi_block, const char *name) {
+ static const uint64_t round_consts[24] = {
+ 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL,
+ 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L,
+ 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL,
+ 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL,
+ 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L,
+ 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L,
+ 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L,
+ 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L
+ };
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", name);
+ address start = __ pc();
+
+ Register buf = c_rarg0;
+ Register state = c_rarg1;
+ Register digest_length = c_rarg2;
+ Register ofs = c_rarg3;
+ Register limit = c_rarg4;
+
+ Label sha3_loop, rounds24_loop;
+ Label sha3_512, sha3_384_or_224, sha3_256;
+
+ __ stpd(v8, v9, __ pre(sp, -64));
+ __ stpd(v10, v11, Address(sp, 16));
+ __ stpd(v12, v13, Address(sp, 32));
+ __ stpd(v14, v15, Address(sp, 48));
+
+ // load state
+ __ add(rscratch1, state, 32);
+ __ ld1(v0, v1, v2, v3, __ T1D, state);
+ __ ld1(v4, v5, v6, v7, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v8, v9, v10, v11, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v12, v13, v14, v15, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v16, v17, v18, v19, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v20, v21, v22, v23, __ T1D, __ post(rscratch1, 32));
+ __ ld1(v24, __ T1D, rscratch1);
+
+ __ BIND(sha3_loop);
+
+ // 24 keccak rounds
+ __ movw(rscratch2, 24);
+
+ // load round_constants base
+ __ lea(rscratch1, ExternalAddress((address) round_consts));
+
+ // load input
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, v30, v31, __ T8B, __ post(buf, 24));
+ __ eor(v0, __ T8B, v0, v25);
+ __ eor(v1, __ T8B, v1, v26);
+ __ eor(v2, __ T8B, v2, v27);
+ __ eor(v3, __ T8B, v3, v28);
+ __ eor(v4, __ T8B, v4, v29);
+ __ eor(v5, __ T8B, v5, v30);
+ __ eor(v6, __ T8B, v6, v31);
+
+ // digest_length == 64, SHA3-512
+ __ tbnz(digest_length, 6, sha3_512);
+
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, v30, __ T8B, __ post(buf, 16));
+ __ eor(v7, __ T8B, v7, v25);
+ __ eor(v8, __ T8B, v8, v26);
+ __ eor(v9, __ T8B, v9, v27);
+ __ eor(v10, __ T8B, v10, v28);
+ __ eor(v11, __ T8B, v11, v29);
+ __ eor(v12, __ T8B, v12, v30);
+
+ // digest_length == 28, SHA3-224; digest_length == 48, SHA3-384
+ __ tbnz(digest_length, 4, sha3_384_or_224);
+
+ // SHA3-256
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ eor(v13, __ T8B, v13, v25);
+ __ eor(v14, __ T8B, v14, v26);
+ __ eor(v15, __ T8B, v15, v27);
+ __ eor(v16, __ T8B, v16, v28);
+ __ b(rounds24_loop);
+
+ __ BIND(sha3_384_or_224);
+ __ tbz(digest_length, 2, rounds24_loop); // bit 2 cleared? SHA-384
+
+ // SHA3-224
+ __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32));
+ __ ld1(v29, __ T8B, __ post(buf, 8));
+ __ eor(v13, __ T8B, v13, v25);
+ __ eor(v14, __ T8B, v14, v26);
+ __ eor(v15, __ T8B, v15, v27);
+ __ eor(v16, __ T8B, v16, v28);
+ __ eor(v17, __ T8B, v17, v29);
+ __ b(rounds24_loop);
+
+ __ BIND(sha3_512);
+ __ ld1(v25, v26, __ T8B, __ post(buf, 16));
+ __ eor(v7, __ T8B, v7, v25);
+ __ eor(v8, __ T8B, v8, v26);
+
+ __ BIND(rounds24_loop);
+ __ subw(rscratch2, rscratch2, 1);
+
+ __ eor3(v29, __ T16B, v4, v9, v14);
+ __ eor3(v26, __ T16B, v1, v6, v11);
+ __ eor3(v28, __ T16B, v3, v8, v13);
+ __ eor3(v25, __ T16B, v0, v5, v10);
+ __ eor3(v27, __ T16B, v2, v7, v12);
+ __ eor3(v29, __ T16B, v29, v19, v24);
+ __ eor3(v26, __ T16B, v26, v16, v21);
+ __ eor3(v28, __ T16B, v28, v18, v23);
+ __ eor3(v25, __ T16B, v25, v15, v20);
+ __ eor3(v27, __ T16B, v27, v17, v22);
+
+ __ rax1(v30, __ T2D, v29, v26);
+ __ rax1(v26, __ T2D, v26, v28);
+ __ rax1(v28, __ T2D, v28, v25);
+ __ rax1(v25, __ T2D, v25, v27);
+ __ rax1(v27, __ T2D, v27, v29);
+
+ __ eor(v0, __ T16B, v0, v30);
+ __ xar(v29, __ T2D, v1, v25, (64 - 1));
+ __ xar(v1, __ T2D, v6, v25, (64 - 44));
+ __ xar(v6, __ T2D, v9, v28, (64 - 20));
+ __ xar(v9, __ T2D, v22, v26, (64 - 61));
+ __ xar(v22, __ T2D, v14, v28, (64 - 39));
+ __ xar(v14, __ T2D, v20, v30, (64 - 18));
+ __ xar(v31, __ T2D, v2, v26, (64 - 62));
+ __ xar(v2, __ T2D, v12, v26, (64 - 43));
+ __ xar(v12, __ T2D, v13, v27, (64 - 25));
+ __ xar(v13, __ T2D, v19, v28, (64 - 8));
+ __ xar(v19, __ T2D, v23, v27, (64 - 56));
+ __ xar(v23, __ T2D, v15, v30, (64 - 41));
+ __ xar(v15, __ T2D, v4, v28, (64 - 27));
+ __ xar(v28, __ T2D, v24, v28, (64 - 14));
+ __ xar(v24, __ T2D, v21, v25, (64 - 2));
+ __ xar(v8, __ T2D, v8, v27, (64 - 55));
+ __ xar(v4, __ T2D, v16, v25, (64 - 45));
+ __ xar(v16, __ T2D, v5, v30, (64 - 36));
+ __ xar(v5, __ T2D, v3, v27, (64 - 28));
+ __ xar(v27, __ T2D, v18, v27, (64 - 21));
+ __ xar(v3, __ T2D, v17, v26, (64 - 15));
+ __ xar(v25, __ T2D, v11, v25, (64 - 10));
+ __ xar(v26, __ T2D, v7, v26, (64 - 6));
+ __ xar(v30, __ T2D, v10, v30, (64 - 3));
+
+ __ bcax(v20, __ T16B, v31, v22, v8);
+ __ bcax(v21, __ T16B, v8, v23, v22);
+ __ bcax(v22, __ T16B, v22, v24, v23);
+ __ bcax(v23, __ T16B, v23, v31, v24);
+ __ bcax(v24, __ T16B, v24, v8, v31);
+
+ __ ld1r(v31, __ T2D, __ post(rscratch1, 8));
+
+ __ bcax(v17, __ T16B, v25, v19, v3);
+ __ bcax(v18, __ T16B, v3, v15, v19);
+ __ bcax(v19, __ T16B, v19, v16, v15);
+ __ bcax(v15, __ T16B, v15, v25, v16);
+ __ bcax(v16, __ T16B, v16, v3, v25);
+
+ __ bcax(v10, __ T16B, v29, v12, v26);
+ __ bcax(v11, __ T16B, v26, v13, v12);
+ __ bcax(v12, __ T16B, v12, v14, v13);
+ __ bcax(v13, __ T16B, v13, v29, v14);
+ __ bcax(v14, __ T16B, v14, v26, v29);
+
+ __ bcax(v7, __ T16B, v30, v9, v4);
+ __ bcax(v8, __ T16B, v4, v5, v9);
+ __ bcax(v9, __ T16B, v9, v6, v5);
+ __ bcax(v5, __ T16B, v5, v30, v6);
+ __ bcax(v6, __ T16B, v6, v4, v30);
+
+ __ bcax(v3, __ T16B, v27, v0, v28);
+ __ bcax(v4, __ T16B, v28, v1, v0);
+ __ bcax(v0, __ T16B, v0, v2, v1);
+ __ bcax(v1, __ T16B, v1, v27, v2);
+ __ bcax(v2, __ T16B, v2, v28, v27);
+
+ __ eor(v0, __ T16B, v0, v31);
+
+ __ cbnzw(rscratch2, rounds24_loop);
+
+ if (multi_block) {
+ // block_size = 200 - 2 * digest_length, ofs += block_size
+ __ add(ofs, ofs, 200);
+ __ sub(ofs, ofs, digest_length, Assembler::LSL, 1);
+
+ __ cmp(ofs, limit);
+ __ br(Assembler::LE, sha3_loop);
+ __ mov(c_rarg0, ofs); // return ofs
+ }
+
+ __ st1(v0, v1, v2, v3, __ T1D, __ post(state, 32));
+ __ st1(v4, v5, v6, v7, __ T1D, __ post(state, 32));
+ __ st1(v8, v9, v10, v11, __ T1D, __ post(state, 32));
+ __ st1(v12, v13, v14, v15, __ T1D, __ post(state, 32));
+ __ st1(v16, v17, v18, v19, __ T1D, __ post(state, 32));
+ __ st1(v20, v21, v22, v23, __ T1D, __ post(state, 32));
+ __ st1(v24, __ T1D, state);
+
+ __ ldpd(v14, v15, Address(sp, 48));
+ __ ldpd(v12, v13, Address(sp, 32));
+ __ ldpd(v10, v11, Address(sp, 16));
+ __ ldpd(v8, v9, __ post(sp, 64));
+
+ __ ret(lr);
+
+ return start;
+ }
+
// Safefetch stubs.
void generate_safefetch(const char* name, int size, address* entry,
address* fault_pc, address* continuation_pc) {
@@ -3739,6 +3968,238 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerRightShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
+ address start = __ pc();
+
+ Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+ Register idx = numIter;
+
+ Register newArrCur = rscratch1;
+ Register shiftRevCount = rscratch2;
+ Register oldArrCur = r13;
+ Register oldArrNext = r14;
+
+ FloatRegister oldElem0 = v0;
+ FloatRegister oldElem1 = v1;
+ FloatRegister newElem = v2;
+ FloatRegister shiftVCount = v3;
+ FloatRegister shiftVRevCount = v4;
+
+ __ cbz(idx, Exit);
+
+ __ add(newArr, newArr, newIdx, Assembler::LSL, 2);
+
+ // left shift count
+ __ movw(shiftRevCount, 32);
+ __ subw(shiftRevCount, shiftRevCount, shiftCount);
+
+ // numIter too small to allow a 4-words SIMD loop, rolling back
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftThree);
+
+ __ dup(shiftVCount, __ T4S, shiftCount);
+ __ dup(shiftVRevCount, __ T4S, shiftRevCount);
+ __ negr(shiftVCount, __ T4S, shiftVCount);
+
+ __ BIND(ShiftSIMDLoop);
+
+ // Calculate the load addresses
+ __ sub(idx, idx, 4);
+ __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2);
+ __ add(newArrCur, newArr, idx, Assembler::LSL, 2);
+ __ add(oldArrCur, oldArrNext, 4);
+
+ // Load 4 words and process
+ __ ld1(oldElem0, __ T4S, Address(oldArrCur));
+ __ ld1(oldElem1, __ T4S, Address(oldArrNext));
+ __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T16B, oldElem0, oldElem1);
+ __ st1(newElem, __ T4S, Address(newArrCur));
+
+ __ cmp(idx, (u1)4);
+ __ br(Assembler::LT, ShiftTwoLoop);
+ __ b(ShiftSIMDLoop);
+
+ __ BIND(ShiftTwoLoop);
+ __ cbz(idx, Exit);
+ __ cmp(idx, (u1)1);
+ __ br(Assembler::EQ, ShiftOne);
+
+ // Calculate the load addresses
+ __ sub(idx, idx, 2);
+ __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2);
+ __ add(newArrCur, newArr, idx, Assembler::LSL, 2);
+ __ add(oldArrCur, oldArrNext, 4);
+
+ // Load 2 words and process
+ __ ld1(oldElem0, __ T2S, Address(oldArrCur));
+ __ ld1(oldElem1, __ T2S, Address(oldArrNext));
+ __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T8B, oldElem0, oldElem1);
+ __ st1(newElem, __ T2S, Address(newArrCur));
+ __ b(ShiftTwoLoop);
+
+ __ BIND(ShiftThree);
+ __ tbz(idx, 1, ShiftOne);
+ __ tbz(idx, 0, ShiftTwo);
+ __ ldrw(r10, Address(oldArr, 12));
+ __ ldrw(r11, Address(oldArr, 8));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr, 8));
+
+ __ BIND(ShiftTwo);
+ __ ldrw(r10, Address(oldArr, 8));
+ __ ldrw(r11, Address(oldArr, 4));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr, 4));
+
+ __ BIND(ShiftOne);
+ __ ldrw(r10, Address(oldArr, 4));
+ __ ldrw(r11, Address(oldArr));
+ __ lsrvw(r10, r10, shiftCount);
+ __ lslvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr));
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
+ // Arguments:
+ //
+ // Input:
+ // c_rarg0 - newArr address
+ // c_rarg1 - oldArr address
+ // c_rarg2 - newIdx
+ // c_rarg3 - shiftCount
+ // c_rarg4 - numIter
+ //
+ address generate_bigIntegerLeftShift() {
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
+ address start = __ pc();
+
+ Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit;
+
+ Register newArr = c_rarg0;
+ Register oldArr = c_rarg1;
+ Register newIdx = c_rarg2;
+ Register shiftCount = c_rarg3;
+ Register numIter = c_rarg4;
+
+ Register shiftRevCount = rscratch1;
+ Register oldArrNext = rscratch2;
+
+ FloatRegister oldElem0 = v0;
+ FloatRegister oldElem1 = v1;
+ FloatRegister newElem = v2;
+ FloatRegister shiftVCount = v3;
+ FloatRegister shiftVRevCount = v4;
+
+ __ cbz(numIter, Exit);
+
+ __ add(oldArrNext, oldArr, 4);
+ __ add(newArr, newArr, newIdx, Assembler::LSL, 2);
+
+ // right shift count
+ __ movw(shiftRevCount, 32);
+ __ subw(shiftRevCount, shiftRevCount, shiftCount);
+
+ // numIter too small to allow a 4-words SIMD loop, rolling back
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftThree);
+
+ __ dup(shiftVCount, __ T4S, shiftCount);
+ __ dup(shiftVRevCount, __ T4S, shiftRevCount);
+ __ negr(shiftVRevCount, __ T4S, shiftVRevCount);
+
+ __ BIND(ShiftSIMDLoop);
+
+ // load 4 words and process
+ __ ld1(oldElem0, __ T4S, __ post(oldArr, 16));
+ __ ld1(oldElem1, __ T4S, __ post(oldArrNext, 16));
+ __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T16B, oldElem0, oldElem1);
+ __ st1(newElem, __ T4S, __ post(newArr, 16));
+ __ sub(numIter, numIter, 4);
+
+ __ cmp(numIter, (u1)4);
+ __ br(Assembler::LT, ShiftTwoLoop);
+ __ b(ShiftSIMDLoop);
+
+ __ BIND(ShiftTwoLoop);
+ __ cbz(numIter, Exit);
+ __ cmp(numIter, (u1)1);
+ __ br(Assembler::EQ, ShiftOne);
+
+ // load 2 words and process
+ __ ld1(oldElem0, __ T2S, __ post(oldArr, 8));
+ __ ld1(oldElem1, __ T2S, __ post(oldArrNext, 8));
+ __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount);
+ __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount);
+ __ orr(newElem, __ T8B, oldElem0, oldElem1);
+ __ st1(newElem, __ T2S, __ post(newArr, 8));
+ __ sub(numIter, numIter, 2);
+ __ b(ShiftTwoLoop);
+
+ __ BIND(ShiftThree);
+ __ ldrw(r10, __ post(oldArr, 4));
+ __ ldrw(r11, __ post(oldArrNext, 4));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, __ post(newArr, 4));
+ __ tbz(numIter, 1, Exit);
+ __ tbz(numIter, 0, ShiftOne);
+
+ __ BIND(ShiftTwo);
+ __ ldrw(r10, __ post(oldArr, 4));
+ __ ldrw(r11, __ post(oldArrNext, 4));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, __ post(newArr, 4));
+
+ __ BIND(ShiftOne);
+ __ ldrw(r10, Address(oldArr));
+ __ ldrw(r11, Address(oldArrNext));
+ __ lslvw(r10, r10, shiftCount);
+ __ lsrvw(r11, r11, shiftRevCount);
+ __ orrw(r12, r10, r11);
+ __ strw(r12, Address(newArr));
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@@ -4942,6 +5403,150 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
+ void generate_base64_encode_simdround(Register src, Register dst,
+ FloatRegister codec, u8 size) {
+
+ FloatRegister in0 = v4, in1 = v5, in2 = v6;
+ FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19;
+ FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23;
+
+ Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B;
+
+ __ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size));
+
+ __ ushr(ind0, arrangement, in0, 2);
+
+ __ ushr(ind1, arrangement, in1, 2);
+ __ shl(in0, arrangement, in0, 6);
+ __ orr(ind1, arrangement, ind1, in0);
+ __ ushr(ind1, arrangement, ind1, 2);
+
+ __ ushr(ind2, arrangement, in2, 4);
+ __ shl(in1, arrangement, in1, 4);
+ __ orr(ind2, arrangement, in1, ind2);
+ __ ushr(ind2, arrangement, ind2, 2);
+
+ __ shl(ind3, arrangement, in2, 2);
+ __ ushr(ind3, arrangement, ind3, 2);
+
+ __ tbl(out0, arrangement, codec, 4, ind0);
+ __ tbl(out1, arrangement, codec, 4, ind1);
+ __ tbl(out2, arrangement, codec, 4, ind2);
+ __ tbl(out3, arrangement, codec, 4, ind3);
+
+ __ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size));
+ }
+
+ /**
+ * Arguments:
+ *
+ * Input:
+ * c_rarg0 - src_start
+ * c_rarg1 - src_offset
+ * c_rarg2 - src_length
+ * c_rarg3 - dest_start
+ * c_rarg4 - dest_offset
+ * c_rarg5 - isURL
+ *
+ */
+ address generate_base64_encodeBlock() {
+
+ static const char toBase64[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/'
+ };
+
+ static const char toBase64URL[64] = {
+ 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+ 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
+ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
+ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
+ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_'
+ };
+
+ __ align(CodeEntryAlignment);
+ StubCodeMark mark(this, "StubRoutines", "encodeBlock");
+ address start = __ pc();
+
+ Register src = c_rarg0; // source array
+ Register soff = c_rarg1; // source start offset
+ Register send = c_rarg2; // source end offset
+ Register dst = c_rarg3; // dest array
+ Register doff = c_rarg4; // position for writing to dest array
+ Register isURL = c_rarg5; // Base64 or URL chracter set
+
+ // c_rarg6 and c_rarg7 are free to use as temps
+ Register codec = c_rarg6;
+ Register length = c_rarg7;
+
+ Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit;
+
+ __ add(src, src, soff);
+ __ add(dst, dst, doff);
+ __ sub(length, send, soff);
+
+ // load the codec base address
+ __ lea(codec, ExternalAddress((address) toBase64));
+ __ cbz(isURL, ProcessData);
+ __ lea(codec, ExternalAddress((address) toBase64URL));
+
+ __ BIND(ProcessData);
+
+ // too short to formup a SIMD loop, roll back
+ __ cmp(length, (u1)24);
+ __ br(Assembler::LT, Process3B);
+
+ __ ld1(v0, v1, v2, v3, __ T16B, Address(codec));
+
+ __ BIND(Process48B);
+ __ cmp(length, (u1)48);
+ __ br(Assembler::LT, Process24B);
+ generate_base64_encode_simdround(src, dst, v0, 16);
+ __ sub(length, length, 48);
+ __ b(Process48B);
+
+ __ BIND(Process24B);
+ __ cmp(length, (u1)24);
+ __ br(Assembler::LT, SIMDExit);
+ generate_base64_encode_simdround(src, dst, v0, 8);
+ __ sub(length, length, 24);
+
+ __ BIND(SIMDExit);
+ __ cbz(length, Exit);
+
+ __ BIND(Process3B);
+ // 3 src bytes, 24 bits
+ __ ldrb(r10, __ post(src, 1));
+ __ ldrb(r11, __ post(src, 1));
+ __ ldrb(r12, __ post(src, 1));
+ __ orrw(r11, r11, r10, Assembler::LSL, 8);
+ __ orrw(r12, r12, r11, Assembler::LSL, 8);
+ // codec index
+ __ ubfmw(r15, r12, 18, 23);
+ __ ubfmw(r14, r12, 12, 17);
+ __ ubfmw(r13, r12, 6, 11);
+ __ andw(r12, r12, 63);
+ // get the code based on the codec
+ __ ldrb(r15, Address(codec, r15, Address::uxtw(0)));
+ __ ldrb(r14, Address(codec, r14, Address::uxtw(0)));
+ __ ldrb(r13, Address(codec, r13, Address::uxtw(0)));
+ __ ldrb(r12, Address(codec, r12, Address::uxtw(0)));
+ __ strb(r15, __ post(dst, 1));
+ __ strb(r14, __ post(dst, 1));
+ __ strb(r13, __ post(dst, 1));
+ __ strb(r12, __ post(dst, 1));
+ __ sub(length, length, 3);
+ __ cbnz(length, Process3B);
+
+ __ BIND(Exit);
+ __ ret(lr);
+
+ return start;
+ }
+
// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
@@ -5958,6 +6563,8 @@ class StubGenerator: public StubCodeGenerator {
SharedRuntime::
throw_NullPointerException_at_call));
+ StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices");
+
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
@@ -5993,6 +6600,11 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_mulAdd = generate_mulAdd();
}
+ if (UseSIMDForBigIntegerShiftIntrinsics) {
+ StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
+ StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
+ }
+
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
@@ -6013,6 +6625,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
}
+ if (UseBASE64Intrinsics) {
+ StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock();
+ }
+
// data cache line writeback
StubRoutines::_data_cache_writeback = generate_data_cache_writeback();
StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync();
@@ -6036,6 +6652,10 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
+ if (UseSHA3Intrinsics) {
+ StubRoutines::_sha3_implCompress = generate_sha3_implCompress(false, "sha3_implCompress");
+ StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(true, "sha3_implCompressMB");
+ }
// generate Adler32 intrinsics code
if (UseAdler32Intrinsics) {
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
index b2d0d5dbff8..f471209a4c0 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp
@@ -40,6 +40,7 @@ address StubRoutines::aarch64::_f2i_fixup = NULL;
address StubRoutines::aarch64::_f2l_fixup = NULL;
address StubRoutines::aarch64::_d2i_fixup = NULL;
address StubRoutines::aarch64::_d2l_fixup = NULL;
+address StubRoutines::aarch64::_vector_iota_indices = NULL;
address StubRoutines::aarch64::_float_sign_mask = NULL;
address StubRoutines::aarch64::_float_sign_flip = NULL;
address StubRoutines::aarch64::_double_sign_mask = NULL;
diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
index 4ace7b5c808..6960a19b3f5 100644
--- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp
@@ -51,6 +51,7 @@ class aarch64 {
static address _d2i_fixup;
static address _d2l_fixup;
+ static address _vector_iota_indices;
static address _float_sign_mask;
static address _float_sign_flip;
static address _double_sign_mask;
@@ -106,6 +107,10 @@ class aarch64 {
return _d2l_fixup;
}
+ static address vector_iota_indices() {
+ return _vector_iota_indices;
+ }
+
static address float_sign_mask()
{
return _float_sign_mask;
diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
index 21566592a9f..874d8ce2766 100644
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp
@@ -980,7 +980,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path);
+ __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -1029,7 +1029,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
Label slow_path;
// If we need a safepoint check, generate full interpreter entry.
- __ safepoint_poll(slow_path);
+ __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */);
// We don't generate local frame and don't align stack because
// we call stub code and there is no safepoint on this path.
@@ -1120,7 +1120,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
// an interpreter frame with greater than a page of locals, so each page
// needs to be checked. Only true for non-native.
if (UseStackBanging) {
- const int n_shadow_pages = StackOverflow::stack_shadow_zone_size() / os::vm_page_size();
+ const int n_shadow_pages = (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size());
const int start_page = native_call ? n_shadow_pages : 1;
const int page_size = os::vm_page_size();
for (int pages = start_page; pages <= n_shadow_pages ; pages++) {
@@ -1388,7 +1388,16 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// check for safepoint operation in progress and/or pending suspend requests
{
Label L, Continue;
- __ safepoint_poll_acquire(L);
+
+ // We need an acquire here to ensure that any subsequent load of the
+ // global SafepointSynchronize::_state flag is ordered after this load
+ // of the thread-local polling word. We don't want this poll to
+ // return false (i.e. not safepointing) and a later poll of the global
+ // SafepointSynchronize::_state spuriously to return true.
+ //
+ // This is to avoid a race when we're in a native->Java transition
+ // racing the code which wakes up from a safepoint.
+ __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */);
__ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset()));
__ cbz(rscratch2, Continue);
__ bind(L);
diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
index 842f07ae9a0..811783fcb7d 100644
--- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp
@@ -1906,7 +1906,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide)
__ dispatch_only(vtos, /*generate_poll*/true);
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
__ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
index 343a2bbd50f..2a6553d9c21 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp
@@ -181,10 +181,6 @@ void VM_Version::initialize() {
}
if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
- // If an olde style /proc/cpuinfo (cores == 1) then if _model is an A57 (0xd07)
- // we assume the worst and assume we could be on a big little system and have
- // undisclosed A53 cores which we could be swapped to at any stage
- if (_cpu == CPU_ARM && os::processor_count() == 1 && _model == 0xd07) _features |= CPU_A53MAC;
char buf[512];
sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision);
@@ -194,6 +190,7 @@ void VM_Version::initialize() {
if (_features & CPU_AES) strcat(buf, ", aes");
if (_features & CPU_SHA1) strcat(buf, ", sha1");
if (_features & CPU_SHA2) strcat(buf, ", sha256");
+ if (_features & CPU_SHA3) strcat(buf, ", sha3");
if (_features & CPU_SHA512) strcat(buf, ", sha512");
if (_features & CPU_LSE) strcat(buf, ", lse");
if (_features & CPU_SVE) strcat(buf, ", sve");
@@ -275,7 +272,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseMD5Intrinsics, false);
}
- if (_features & (CPU_SHA1 | CPU_SHA2)) {
+ if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) {
if (FLAG_IS_DEFAULT(UseSHA)) {
FLAG_SET_DEFAULT(UseSHA, true);
}
@@ -302,6 +299,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
+ if (UseSHA && (_features & CPU_SHA3)) {
+ // Do not auto-enable UseSHA3Intrinsics until it has been fully tested on hardware
+ // if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
+ // FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
+ // }
+ } else if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (UseSHA && (_features & CPU_SHA512)) {
// Do not auto-enable UseSHA512Intrinsics until it has been fully tested on hardware
// if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
@@ -312,7 +319,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
- if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+ if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
@@ -325,6 +332,10 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
}
+ if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
+ UseBASE64Intrinsics = true;
+ }
+
if (is_zva_enabled()) {
if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
FLAG_SET_DEFAULT(UseBlockZeroing, true);
@@ -390,7 +401,7 @@ void VM_Version::initialize() {
warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
UseSVE = 0;
} else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
- int new_vl = set_and_get_current_sve_vector_lenght(MaxVectorSize);
+ int new_vl = set_and_get_current_sve_vector_length(MaxVectorSize);
_initial_sve_vector_length = new_vl;
// Update MaxVectorSize to the largest supported value.
if (new_vl < 0) {
diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
index 292550529b4..45838f87072 100644
--- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp
@@ -51,7 +51,7 @@ class VM_Version : public Abstract_VM_Version {
// Sets the SVE length and returns a new actual value or negative on error.
// If the len is larger than the system largest supported SVE vector length,
// the function sets the largest supported value.
- static int set_and_get_current_sve_vector_lenght(int len);
+ static int set_and_get_current_sve_vector_length(int len);
static int get_current_sve_vector_length();
public:
@@ -103,6 +103,7 @@ class VM_Version : public Abstract_VM_Version {
CPU_CRC32 = (1<<7),
CPU_LSE = (1<<8),
CPU_DCPOP = (1<<16),
+ CPU_SHA3 = (1<<17),
CPU_SHA512 = (1<<21),
CPU_SVE = (1<<22),
// flags above must follow Linux HWCAP
@@ -128,6 +129,7 @@ class VM_Version : public Abstract_VM_Version {
static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
static bool supports_fast_class_init_checks() { return true; }
+ constexpr static bool supports_stack_watermark_barrier() { return true; }
};
#endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP
diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad
index 4c237673181..b7c6ec48896 100644
--- a/src/hotspot/cpu/arm/arm.ad
+++ b/src/hotspot/cpu/arm/arm.ad
@@ -993,6 +993,10 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return VM_Version::has_simd();
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -1155,10 +1159,6 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = false;
-// No-op on ARM.
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -1663,7 +1663,6 @@ frame %{
// These two registers define part of the calling convention
// between compiled code and the interpreter.
inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C
- interpreter_method_reg(R_Rmethod); // Method Register when calling interpreter
// Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
cisc_spilling_operand_name(indOffset);
@@ -2523,14 +2522,6 @@ operand inline_cache_regP(iRegP reg) %{
interface(REG_INTER);
%}
-operand interpreter_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(Rmethod_regP));
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-
//----------Complex Operands---------------------------------------------------
// Indirect Memory Reference
operand indirect(sp_ptr_RegP reg) %{
diff --git a/src/hotspot/cpu/arm/arm_32.ad b/src/hotspot/cpu/arm/arm_32.ad
index 177c1a7cae0..09fce8c4c4f 100644
--- a/src/hotspot/cpu/arm/arm_32.ad
+++ b/src/hotspot/cpu/arm/arm_32.ad
@@ -182,11 +182,11 @@ alloc_class chunk0(
alloc_class chunk1(
R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23,
R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31,
- R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7,
+ R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7,
R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15,
- R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
- R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
- R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
+ R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x,
+ R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x,
+ R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x,
R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x
);
@@ -196,8 +196,7 @@ alloc_class chunk2(APSR, FPSCR);
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg ( as defined in frame section )
-// 2) reg_class interpreter_method_reg ( as defined in frame section )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// ----------------------------
@@ -223,7 +222,6 @@ reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_
reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14, R_R10 /* TLS*/, R_R13 /* SP*/);
#define R_Ricklass R_R8
-#define R_Rmethod R_R9
#define R_Rthread R_R10
#define R_Rexception_obj R_R4
@@ -237,7 +235,6 @@ reg_class R9_regP(R_R9);
reg_class R12_regP(R_R12);
reg_class Rexception_regP(R_Rexception_obj);
reg_class Ricklass_regP(R_Ricklass);
-reg_class Rmethod_regP(R_Rmethod);
reg_class Rthread_regP(R_Rthread);
reg_class IP_regP(R_R12);
reg_class SP_regP(R_R13);
@@ -442,7 +439,7 @@ int MachCallStaticJavaNode::ret_addr_offset() {
int MachCallDynamicJavaNode::ret_addr_offset() {
bool far = !cache_reachable();
// mov_oop is always 2 words
- return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size;
+ return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size;
}
int MachCallRuntimeNode::ret_addr_offset() {
diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
index 915eb73730c..6b390c1cda3 100644
--- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp
@@ -38,6 +38,10 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
ce->store_parameter(_bci, 0);
diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
index 7b0794afc9f..f9b5fc69a89 100644
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp
@@ -283,7 +283,7 @@ int LIR_Assembler::emit_deopt_handler() {
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
// Pop the frame before safepoint polling
__ remove_frame(initial_frame_size_in_bytes());
__ read_polling_page(Rtemp, relocInfo::poll_return_type);
diff --git a/src/hotspot/cpu/arm/c1_globals_arm.hpp b/src/hotspot/cpu/arm/c1_globals_arm.hpp
index 8141870536b..7077a87092c 100644
--- a/src/hotspot/cpu/arm/c1_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp
@@ -35,8 +35,6 @@
#ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp
index 3708e38da2e..525af8b1edc 100644
--- a/src/hotspot/cpu/arm/c2_globals_arm.hpp
+++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp
@@ -54,8 +54,6 @@ define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
// (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.)
//define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize
define_pd_global(intx, RegisterCostAreaRatio, 16000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, MinJumpTableSize, 16);
diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp
index 116d2d40b2e..01ff3a5d39c 100644
--- a/src/hotspot/cpu/arm/interp_masm_arm.cpp
+++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp
@@ -580,7 +580,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
if (needs_thread_local_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- ldr(Rtemp, Address(Rthread, Thread::polling_page_offset()));
+ ldr(Rtemp, Address(Rthread, Thread::polling_word_offset()));
tbnz(Rtemp, exact_log2(SafepointMechanism::poll_bit()), safepoint);
}
@@ -983,7 +983,7 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) {
// Unlocks an object. Used in monitorexit bytecode and remove_activation.
//
-// Argument: R1: Points to BasicObjectLock structure for lock
+// Argument: R0: Points to BasicObjectLock structure for lock
// Throw an IllegalMonitorException if object is not locked by current thread
// Blows volatile registers R0-R3, Rtemp, LR. Calls VM.
void InterpreterMacroAssembler::unlock_object(Register Rlock) {
@@ -996,8 +996,7 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) {
const Register Robj = R2;
const Register Rmark = R3;
- const Register Rresult = R0;
- assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp);
+ assert_different_registers(Robj, Rmark, Rlock, Rtemp);
const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
index 14ac1163da0..067ec704376 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp
@@ -85,20 +85,6 @@ void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
}
}
-// Initially added to the Assembler interface as a pure virtual:
-// RegisterConstant delayed_value(..)
-// for:
-// 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
-// this was subsequently modified to its present name and return type
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- ShouldNotReachHere();
- return RegisterOrConstant(-1);
-}
-
-
-
// virtual method calling
void MacroAssembler::lookup_virtual_method(Register recv_klass,
@@ -1914,7 +1900,7 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
}
void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
- ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset()));
+ ldr_u32(tmp1, Address(Rthread, Thread::polling_word_offset()));
tst(tmp1, exact_log2(SafepointMechanism::poll_bit()));
b(slow_path, eq);
}
diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
index de40c5741a7..a07ca65d99e 100644
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp
@@ -222,14 +222,6 @@ class MacroAssembler: public Assembler {
// returning false to preserve all relocation information.
inline bool ignore_non_patchable_relocations() { return true; }
- // Initially added to the Assembler interface as a pure virtual:
- // RegisterConstant delayed_value(..)
- // for:
- // 6812678 macro assembler needs delayed binding of a few constants (for 6655638)
- // this was subsequently modified to its present name and return type
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset);
-
-
void align(int modulus);
// Support for VM calls
diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
index 7dd1f21a244..a4216785e4e 100644
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp
@@ -33,6 +33,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/vframeArray.hpp"
diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp
index a27bd25557c..d0bcfccbb8d 100644
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp
@@ -2101,7 +2101,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset()));
__ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask,
Rcnt, R4_tmp, eq, &backedge_counter_overflow);
- } else {
+ } else { // not TieredCompilation
// Increment backedge counter in MethodCounters*
__ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/,
Rdisp, R3_bytecode,
@@ -2166,7 +2166,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ dispatch_only(vtos, true);
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp
index 5331a20f2fe..e6fd8b98668 100644
--- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp
+++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp
@@ -236,6 +236,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (UseCRC32Intrinsics) {
if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
warning("CRC32 intrinsics are not available on this CPU");
diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
index b13e18efc12..6902c47d71b 100644
--- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp
@@ -38,6 +38,9 @@
#define __ ce->masm()->
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
: _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
index 54e79f9d4bd..72adb74f4cc 100644
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
@@ -1324,7 +1324,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type,
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
const Register return_pc = R31; // Must survive C-call to enable_stack_reserved_zone().
const Register polling_page = R12;
diff --git a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
index 60b0005e034..f90c1e8b1d2 100644
--- a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp
@@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1000);
define_pd_global(intx, OnStackReplacePercentage, 1400);
-define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ProfileInterpreter, false);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(uintx, ReservedCodeCacheSize, 32*M);
define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M );
define_pd_global(uintx, ProfiledCodeHeapSize, 14*M );
diff --git a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
index 7a0c311e719..c576ddc95c4 100644
--- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
+++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp
@@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 26);
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 16000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
index d58740d5a74..67b18dc0e31 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp
@@ -211,7 +211,7 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Object locking
void lock_object (Register lock_reg, Register obj_reg);
- void unlock_object(Register lock_reg, bool check_for_exceptions = true);
+ void unlock_object(Register lock_reg);
// Interpreter profiling operations
void set_method_data_pointer_for_bcp();
diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
index 555cfd41418..292accb7852 100644
--- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp
@@ -223,7 +223,7 @@ void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register byt
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
- ld(R0, in_bytes(Thread::polling_page_offset()), R16_thread);
+ ld(R0, in_bytes(Thread::polling_word_offset()), R16_thread);
// Armed page has poll_bit set, if poll bit is cleared just continue.
andi_(R0, R0, SafepointMechanism::poll_bit());
beq(CCR0, dispatch);
@@ -878,8 +878,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state,
//
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (UseHeavyMonitors) {
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
} else {
// template code:
//
@@ -980,8 +979,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter.
bind(slow_case);
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
// }
align(32, 12);
bind(done);
@@ -995,7 +993,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// which must be initialized with the object to lock.
//
// Throw IllegalMonitorException if object is not locked by current thread.
-void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) {
+void InterpreterMacroAssembler::unlock_object(Register monitor) {
if (UseHeavyMonitors) {
call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor);
} else {
@@ -2401,8 +2399,7 @@ void InterpreterMacroAssembler::notify_method_entry() {
lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
cmpwi(CCR0, R0, 0);
beq(CCR0, jvmti_post_done);
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry),
- /*check_exceptions=*/true);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry));
bind(jvmti_post_done);
}
@@ -2437,8 +2434,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta
cmpwi(CCR0, R0, 0);
beq(CCR0, jvmti_post_done);
if (!is_native_method) { push(state); } // Expose tos to GC.
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
- /*check_exceptions=*/check_exceptions);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), check_exceptions);
if (!is_native_method) { pop(state); }
align(32, 12);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
index 3d3c39cf5d5..ca1c0c24987 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp
@@ -382,25 +382,6 @@ AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp, int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0) {
- return RegisterOrConstant(value + offset);
- }
-
- // Load indirectly to solve generation ordering problem.
- // static address, no relocation
- int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true);
- ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0)
-
- if (offset != 0) {
- addi(tmp, tmp, offset);
- }
-
- return RegisterOrConstant(tmp);
-}
-
#ifndef PRODUCT
void MacroAssembler::pd_print_patched_instruction(address branch) {
Unimplemented(); // TODO: PPC port
@@ -3044,7 +3025,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
- ld(temp_reg, in_bytes(Thread::polling_page_offset()), R16_thread);
+ ld(temp_reg, in_bytes(Thread::polling_word_offset()), R16_thread);
// Armed page has poll_bit set.
andi_(temp_reg, temp_reg, SafepointMechanism::poll_bit());
bne(CCR0, slow_path);
diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
index a8e43cabdc4..1859483c470 100644
--- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
+++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp
@@ -152,12 +152,6 @@ class MacroAssembler: public Assembler {
// Same as load_address.
inline void set_oop (AddressLiteral obj_addr, Register d);
- // Read runtime constant: Issue load if constant not yet established,
- // else use real constant.
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
//
// branch, jump
//
diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
index fbe956322a6..1134ed0366b 100644
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp
@@ -197,7 +197,11 @@ intptr_t NativeMovConstReg::data() const {
CodeBlob* cb = CodeCache::find_blob_unsafe(addr);
if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) {
narrowOop no = MacroAssembler::get_narrow_oop(addr, cb->content_begin());
- return cast_from_oop(CompressedOops::decode(no));
+ // We can reach here during GC with 'no' pointing to new object location
+ // while 'heap()->is_in' still reports false (e.g. with SerialGC).
+ // Therefore we use raw decoding.
+ if (CompressedOops::is_null(no)) return 0;
+ return cast_from_oop(CompressedOops::decode_raw(no));
} else {
assert(MacroAssembler::is_load_const_from_method_toc_at(addr), "must be load_const_from_pool");
diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad
index d9c7c350e8e..b8f4f26995f 100644
--- a/src/hotspot/cpu/ppc/ppc.ad
+++ b/src/hotspot/cpu/ppc/ppc.ad
@@ -535,9 +535,7 @@ alloc_class chunk4 (
// information in this architecture description.
// 1) reg_class inline_cache_reg ( as defined in frame section )
-// 2) reg_class compiler_method_reg ( as defined in frame section )
-// 2) reg_class interpreter_method_reg ( as defined in frame section )
-// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
+// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//
// ----------------------------
@@ -2064,103 +2062,88 @@ static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) {
}
const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode))
- return false;
+ if (!has_match_rule(opcode)) {
+ return false; // no match rule present
+ }
- bool ret_value = true;
switch (opcode) {
- case Op_SqrtD:
- return VM_Version::has_fsqrt();
- case Op_CountLeadingZerosI:
- case Op_CountLeadingZerosL:
- if (!UseCountLeadingZerosInstructionsPPC64)
- return false;
- break;
- case Op_CountTrailingZerosI:
- case Op_CountTrailingZerosL:
- if (!UseCountLeadingZerosInstructionsPPC64 &&
- !UseCountTrailingZerosInstructionsPPC64)
- return false;
- break;
-
- case Op_PopCountI:
- case Op_PopCountL:
- return (UsePopCountInstruction && VM_Version::has_popcntw());
-
- case Op_StrComp:
- return SpecialStringCompareTo;
- case Op_StrEquals:
- return SpecialStringEquals;
- case Op_StrIndexOf:
- case Op_StrIndexOfChar:
- return SpecialStringIndexOf;
- case Op_AddVB:
- case Op_AddVS:
- case Op_AddVI:
- case Op_AddVF:
- case Op_AddVD:
- case Op_SubVB:
- case Op_SubVS:
- case Op_SubVI:
- case Op_SubVF:
- case Op_SubVD:
- case Op_MulVS:
- case Op_MulVF:
- case Op_MulVD:
- case Op_DivVF:
- case Op_DivVD:
- case Op_AbsVF:
- case Op_AbsVD:
- case Op_NegVF:
- case Op_NegVD:
- case Op_SqrtVF:
- case Op_SqrtVD:
- case Op_AddVL:
- case Op_SubVL:
- case Op_MulVI:
- case Op_RoundDoubleModeV:
- return SuperwordUseVSX;
- case Op_PopCountVI:
- return (SuperwordUseVSX && UsePopCountInstruction);
- case Op_FmaVF:
- case Op_FmaVD:
- return (SuperwordUseVSX && UseFMA);
- case Op_Digit:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
- case Op_LowerCase:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
- case Op_UpperCase:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
- case Op_Whitespace:
- return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
-
- case Op_CacheWB:
- case Op_CacheWBPreSync:
- case Op_CacheWBPostSync:
- if (!VM_Version::supports_data_cache_line_flush()) {
- ret_value = false;
- }
- break;
+ case Op_SqrtD:
+ return VM_Version::has_fsqrt();
+ case Op_CountLeadingZerosI:
+ case Op_CountLeadingZerosL:
+ return UseCountLeadingZerosInstructionsPPC64;
+ case Op_CountTrailingZerosI:
+ case Op_CountTrailingZerosL:
+ return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64);
+ case Op_PopCountI:
+ case Op_PopCountL:
+ return (UsePopCountInstruction && VM_Version::has_popcntw());
+
+ case Op_AddVB:
+ case Op_AddVS:
+ case Op_AddVI:
+ case Op_AddVF:
+ case Op_AddVD:
+ case Op_SubVB:
+ case Op_SubVS:
+ case Op_SubVI:
+ case Op_SubVF:
+ case Op_SubVD:
+ case Op_MulVS:
+ case Op_MulVF:
+ case Op_MulVD:
+ case Op_DivVF:
+ case Op_DivVD:
+ case Op_AbsVF:
+ case Op_AbsVD:
+ case Op_NegVF:
+ case Op_NegVD:
+ case Op_SqrtVF:
+ case Op_SqrtVD:
+ case Op_AddVL:
+ case Op_SubVL:
+ case Op_MulVI:
+ case Op_RoundDoubleModeV:
+ return SuperwordUseVSX;
+ case Op_PopCountVI:
+ return (SuperwordUseVSX && UsePopCountInstruction);
+ case Op_FmaVF:
+ case Op_FmaVD:
+ return (SuperwordUseVSX && UseFMA);
+
+ case Op_Digit:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit);
+ case Op_LowerCase:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase);
+ case Op_UpperCase:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase);
+ case Op_Whitespace:
+ return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace);
+
+ case Op_CacheWB:
+ case Op_CacheWBPreSync:
+ case Op_CacheWBPostSync:
+ return VM_Version::supports_data_cache_line_flush();
}
- return ret_value; // Per default match rules are supported.
+ return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
-
- // TODO
- // identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return false; // not supported
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2340,10 +2323,6 @@ const bool Matcher::rematerialize_float_constants = false;
// Java calling convention forces doubles to be aligned.
const bool Matcher::misaligned_doubles_ok = true;
-void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
- Unimplemented();
-}
-
// Advertise here if the CPU requires explicit rounding operations to implement strictfp mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;
@@ -3855,9 +3834,6 @@ frame %{
// Inline Cache Register or method for I2C.
inline_cache_reg(R19); // R19_method
- // Method Register when calling interpreter.
- interpreter_method_reg(R19); // R19_method
-
// Optional: name the operand used by cisc-spilling to access
// [stack_pointer + offset].
cisc_spilling_operand_name(indOffset);
@@ -3912,7 +3888,7 @@ frame %{
// The `sig' array is to be updated. sig[j] represents the location
// of the j-th argument, either a register or a stack slot.
- // Comment taken from i486.ad:
+ // Comment taken from x86_32.ad:
// Body of function which returns an integer array locating
// arguments either in registers or in stack slots. Passed an array
// of ideal registers called "sig" and a "length" count. Stack-slot
@@ -3924,7 +3900,7 @@ frame %{
SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
%}
- // Comment taken from i486.ad:
+ // Comment taken from x86_32.ad:
// Body of function which returns an integer array locating
// arguments either in registers or in stack slots. Passed an array
// of ideal registers called "sig" and a "length" count. Stack-slot
@@ -4765,20 +4741,6 @@ operand inline_cache_regP(iRegPdst reg) %{
interface(REG_INTER);
%}
-operand compiler_method_regP(iRegPdst reg) %{
- constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand interpreter_method_regP(iRegPdst reg) %{
- constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Operands to remove register moves in unscaled mode.
// Match read/write registers with an EncodeP node if neither shift nor add are required.
operand iRegP2N(iRegPsrc reg) %{
@@ -6588,6 +6550,23 @@ instruct storeV16(indirect mem, vecX src) %{
ins_pipe(pipe_class_default);
%}
+// Reinterpret: only one vector size used: either L or X
+instruct reinterpretL(iRegLdst dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_empty);
+%}
+
+instruct reinterpretX(vecX dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_empty);
+%}
+
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@@ -12618,9 +12597,10 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
ins_cost(180);
- format %{ "String IndexOfChar $haystack[0..$haycnt], $ch"
+ format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch"
" -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
ins_encode %{
__ string_indexof_char($result$$Register,
@@ -12631,6 +12611,25 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
ins_pipe(pipe_class_compare);
%}
+instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt,
+ iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2,
+ flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{
+ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+ effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr);
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ ins_cost(180);
+
+ format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch"
+ " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %}
+ ins_encode %{
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $ch$$Register, 0 /* this is not used if the character is already in a register */,
+ $tmp1$$Register, $tmp2$$Register, true /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_compare);
+%}
+
instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt,
iRegPsrc needle, uimmI15 needlecntImm,
iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5,
diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
index add61ad738c..e8498ba0ed3 100644
--- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
+++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp
@@ -35,6 +35,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1529,156 +1530,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty
}
}
-static void save_or_restore_arguments(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap* map,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- // If map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int slot = arg_save_area;
- // Save down double word first.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
- if (map != NULL) {
- __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- } else {
- __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- }
- } else if (in_regs[i].first()->is_Register() &&
- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
- int offset = slot * VMRegImpl::stack_slot_size;
- if (map != NULL) {
- __ std(in_regs[i].first()->as_Register(), offset, R1_SP);
- if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));
- }
- } else {
- __ ld(in_regs[i].first()->as_Register(), offset, R1_SP);
- }
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
- }
- }
- // Save or restore single word registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int offset = slot * VMRegImpl::stack_slot_size;
- // Value lives in an input register. Save it on stack.
- switch (in_sig_bt[i]) {
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT:
- if (map != NULL) {
- __ stw(in_regs[i].first()->as_Register(), offset, R1_SP);
- } else {
- __ lwa(in_regs[i].first()->as_Register(), offset, R1_SP);
- }
- slot++;
- assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
- break;
- case T_ARRAY:
- case T_LONG:
- // handled above
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_FloatRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
- if (map != NULL) {
- __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- } else {
- __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP);
- }
- }
- } else if (in_regs[i].first()->is_stack()) {
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
- }
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMapSet* oop_maps,
- VMRegPair* in_regs,
- BasicType* in_sig_bt,
- Register tmp_reg ) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
- __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GCLocker::needs_gc_address());
- __ cmplwi(CCR0, tmp_reg, 0);
- __ beq(CCR0, cont);
-
- // Save down any values that are live in registers and call into the
- // runtime to halt for a GC.
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- __ mr(R3_ARG1, R16_thread);
- __ set_last_Java_frame(R1_SP, noreg);
-
- __ block_comment("block_for_jni_critical");
- address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
-#if defined(ABI_ELFv2)
- __ call_c(entry_point, relocInfo::runtime_call_type);
-#else
- __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type);
-#endif
- address start = __ pc() - __ offset(),
- calls_return_pc = __ last_calls_return_pc();
- oop_maps->add_gc_map(calls_return_pc - start, map);
-
- __ reset_last_Java_frame();
-
- // Reload all the register arguments.
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
-
- __ BIND(cont);
-
-#ifdef ASSERT
- if (StressCriticalJNINatives) {
- // Stress register saving.
- OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- // Destroy argument registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- const Register reg = in_regs[i].first()->as_Register();
- __ neg(reg, reg);
- } else if (in_regs[i].first()->is_FloatRegister()) {
- __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister());
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-#endif
-}
-
static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) {
if (src.first()->is_stack()) {
if (dst.first()->is_stack()) {
@@ -1820,25 +1671,12 @@ static void gen_special_dispatch(MacroAssembler* masm,
// Critical native functions are a shorthand for the use of
// GetPrimtiveArrayCritical and disallow the use of any other JNI
// functions. The wrapper is expected to unpack the arguments before
-// passing them to the callee and perform checks before and after the
-// native call to ensure that they GCLocker
-// lock_critical/unlock_critical semantics are followed. Some other
-// parts of JNI setup are skipped like the tear down of the JNI handle
+// passing them to the callee. Critical native functions leave the state _in_Java,
+// since they cannot stop for GC.
+// Some other parts of JNI setup are skipped like the tear down of the JNI handle
// block and the check for pending exceptions it's impossible for them
// to be thrown.
//
-// They are roughly structured like this:
-// if (GCLocker::needs_gc())
-// SharedRuntime::block_for_jni_critical();
-// tranistion to thread_in_native
-// unpack arrray arguments and call native entry point
-// check for safepoint in progress
-// check if any thread suspend flags are set
-// call into JVM and possible unlock the JNI critical
-// if a GC was suppressed while in the critical native.
-// transition back to thread_in_Java
-// return to caller
-//
nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
const methodHandle& method,
int compile_id,
@@ -2145,11 +1983,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
OopMapSet *oop_maps = new OopMapSet();
OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset,
- oop_maps, in_regs, in_sig_bt, r_temp_1);
- }
-
// Move arguments from register/stack to register/stack.
// --------------------------------------------------------------------------
//
@@ -2350,18 +2183,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(locked);
}
-
- // Publish thread state
- // --------------------------------------------------------------------------
-
// Use that pc we placed in r_return_pc a while back as the current frame anchor.
__ set_last_Java_frame(R1_SP, r_return_pc);
- // Transition from _thread_in_Java to _thread_in_native.
- __ li(R0, _thread_in_native);
- __ release();
- // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
- __ stw(R0, thread_(thread_state));
+ if (!is_critical_native) {
+ // Publish thread state
+ // --------------------------------------------------------------------------
+
+ // Transition from _thread_in_Java to _thread_in_native.
+ __ li(R0, _thread_in_native);
+ __ release();
+ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
+ __ stw(R0, thread_(thread_state));
+ }
// The JNI call
@@ -2421,6 +2255,22 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
break;
}
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ Register sync_state = r_temp_5;
+ __ safepoint_poll(needs_safepoint, sync_state);
+
+ Register suspend_flags = r_temp_6;
+ __ lwz(suspend_flags, thread_(suspend_flags));
+ __ cmpwi(CCR1, suspend_flags, 0);
+ __ beq(CCR1, after_transition);
+ __ bind(needs_safepoint);
+ }
// Publish thread state
// --------------------------------------------------------------------------
@@ -2448,7 +2298,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Block, if necessary, before resuming in _thread_in_Java state.
// In order for GC to work, don't clear the last_Java_sp until after blocking.
- Label after_transition;
{
Label no_block, sync;
@@ -2476,31 +2325,27 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(sync);
__ isync();
- address entry_point = is_critical_native
- ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
- : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+ address entry_point =
+ CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
save_native_result(masm, ret_type, workspace_slot_offset);
__ call_VM_leaf(entry_point, R16_thread);
restore_native_result(masm, ret_type, workspace_slot_offset);
- if (is_critical_native) {
- __ b(after_transition); // No thread state transition here.
- }
__ bind(no_block);
- }
- // Publish thread state.
- // --------------------------------------------------------------------------
+ // Publish thread state.
+ // --------------------------------------------------------------------------
- // Thread state is thread_in_native_trans. Any safepoint blocking has
- // already happened so we can now change state to _thread_in_Java.
+ // Thread state is thread_in_native_trans. Any safepoint blocking has
+ // already happened so we can now change state to _thread_in_Java.
- // Transition from _thread_in_native_trans to _thread_in_Java.
- __ li(R0, _thread_in_Java);
- __ lwsync(); // Acquire safepoint and suspend state, release thread state.
- // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
- __ stw(R0, thread_(thread_state));
- __ bind(after_transition);
+ // Transition from _thread_in_native_trans to _thread_in_Java.
+ __ li(R0, _thread_in_Java);
+ __ lwsync(); // Acquire safepoint and suspend state, release thread state.
+ // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
+ __ stw(R0, thread_(thread_state));
+ __ bind(after_transition);
+ }
// Reguard any pages if necessary.
// --------------------------------------------------------------------------
@@ -2657,10 +2502,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
in_ByteSize(lock_offset),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
}
diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
index 994f0a93827..525e4f05255 100644
--- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
+++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp
@@ -1549,9 +1549,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// Handle exceptions
if (synchronized) {
- // Don't check for exceptions since we're still in the i2n frame. Do that
- // manually afterwards.
- __ unlock_object(R26_monitor, false); // Can also unlock methods.
+ __ unlock_object(R26_monitor); // Can also unlock methods.
}
// Reset active handles after returning from native.
@@ -1592,9 +1590,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
BIND(exception_return_sync_check);
if (synchronized) {
- // Don't check for exceptions since we're still in the i2n frame. Do that
- // manually afterwards.
- __ unlock_object(R26_monitor, false); // Can also unlock methods.
+ __ unlock_object(R26_monitor); // Can also unlock methods.
}
BIND(exception_return_sync_check_already_unlocked);
@@ -2105,7 +2101,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
// Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
__ ld(R4_ARG2, 0, R18_locals);
__ call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp);
- __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true);
+
__ cmpdi(CCR0, R4_ARG2, 0);
__ beq(CCR0, L_done);
__ std(R4_ARG2, wordSize, R15_esp);
diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
index e9ccfc7c481..cc341d83072 100644
--- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
+++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp
@@ -2173,7 +2173,7 @@ void TemplateTable::_return(TosState state) {
if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
- __ ld(R11_scratch1, in_bytes(Thread::polling_page_offset()), R16_thread);
+ __ ld(R11_scratch1, in_bytes(Thread::polling_word_offset()), R16_thread);
__ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit());
__ beq(CCR0, no_safepoint);
__ push(state);
diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
index fd62cb5813a..f64999d108a 100644
--- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp
+++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp
@@ -331,6 +331,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
index 367d2a43af5..329c163f313 100644
--- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp
@@ -41,6 +41,10 @@
#undef CHECK_BAILOUT
#define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; }
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ ShouldNotReachHere();
+}
+
RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
: _index(index), _array(array), _throw_index_out_of_bounds_exception(false) {
assert(info != NULL, "must have info");
diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
index 24c8178f1dc..4c7dc79e5e7 100644
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
@@ -1207,7 +1207,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type,
}
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() ||
(result->is_single_cpu() && result->as_register() == Z_R2) ||
(result->is_double_cpu() && result->as_register_lo() == Z_R2) ||
diff --git a/src/hotspot/cpu/s390/c1_globals_s390.hpp b/src/hotspot/cpu/s390/c1_globals_s390.hpp
index 99e26e5e3f8..7fcb1ee0617 100644
--- a/src/hotspot/cpu/s390/c1_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c1_globals_s390.hpp
@@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false);
define_pd_global(intx, CompileThreshold, 1000);
define_pd_global(intx, OnStackReplacePercentage, 1400);
-define_pd_global(bool, UseTLAB, true);
define_pd_global(bool, ProfileInterpreter, false);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(uintx, ReservedCodeCacheSize, 32*M);
define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M);
define_pd_global(uintx, ProfiledCodeHeapSize, 14*M);
diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp
index 2f44fa73a2e..64d5585d616 100644
--- a/src/hotspot/cpu/s390/c2_globals_s390.hpp
+++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp
@@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 10); // Medium size registe
define_pd_global(intx, InteriorEntryAlignment, 2);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));
define_pd_global(intx, RegisterCostAreaRatio, 12000);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(intx, LoopUnrollLimit, 60);
define_pd_global(intx, LoopPercentProfileLimit, 10);
define_pd_global(intx, MinJumpTableSize, 18);
diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp
index d612d528c51..4f44359b04d 100644
--- a/src/hotspot/cpu/s390/interp_masm_s390.cpp
+++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp
@@ -121,7 +121,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bo
address *sfpt_tbl = Interpreter::safept_table(state);
if (table != sfpt_tbl) {
Label dispatch;
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set, if poll bit is cleared just continue.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_braz(dispatch);
@@ -969,8 +969,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state,
void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
if (UseHeavyMonitors) {
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/false);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
return;
}
@@ -1061,9 +1060,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
// None of the above fast optimizations worked so we have to get into the
// slow case of monitor enter.
bind(slow_case);
-
- call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
- monitor, /*check_for_exceptions=*/false);
+ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor);
// }
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
index c71a15daa7c..d7c95ee96ee 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp
@@ -1810,34 +1810,6 @@ void MacroAssembler::c2bool(Register r, Register t) {
z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise.
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0) {
- return RegisterOrConstant(value + offset);
- }
-
- BLOCK_COMMENT("delayed_value {");
- // Load indirectly to solve generation ordering problem.
- load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a;
- z_lg(tmp, 0, tmp); // tmp = *tmp;
-
-#ifdef ASSERT
- NearLabel L;
- compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L);
- z_illtrap();
- bind(L);
-#endif
-
- if (offset != 0) {
- z_agfi(tmp, offset); // tmp = tmp + offset;
- }
-
- BLOCK_COMMENT("} delayed_value");
- return RegisterOrConstant(tmp);
-}
-
// Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos'
// and return the resulting instruction.
// Dest_pos and inst_pos are 32 bit only. These parms can only designate
@@ -2680,7 +2652,7 @@ uint MacroAssembler::get_poll_register(address instr_loc) {
}
void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) {
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
// Armed page has poll_bit set.
z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
z_brnaz(slow_path);
diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
index 41294b0fe87..113a1a3db2a 100644
--- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp
+++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp
@@ -350,9 +350,6 @@ class MacroAssembler: public Assembler {
// Uses constant_metadata_address.
inline bool set_metadata_constant(Metadata* md, Register d);
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
//
// branch, jump
//
diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad
index bb98182d781..de1565194ed 100644
--- a/src/hotspot/cpu/s390/s390.ad
+++ b/src/hotspot/cpu/s390/s390.ad
@@ -278,9 +278,7 @@ alloc_class chunk2(
// information in this architecture description.
// 1) reg_class inline_cache_reg (as defined in frame section)
-// 2) reg_class compiler_method_reg (as defined in frame section)
-// 2) reg_class interpreter_method_reg (as defined in frame section)
-// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */)
+// 2) reg_class stack_slots(/* one chunk of stack-based "registers" */)
// Integer Register Classes
reg_class z_int_reg(
@@ -1513,66 +1511,38 @@ static Register reg_to_register_object(int register_encoding) {
}
const bool Matcher::match_rule_supported(int opcode) {
- if (!has_match_rule(opcode)) return false;
+ if (!has_match_rule(opcode)) {
+ return false; // no match rule present
+ }
switch (opcode) {
- case Op_CountLeadingZerosI:
- case Op_CountLeadingZerosL:
- case Op_CountTrailingZerosI:
- case Op_CountTrailingZerosL:
- // Implementation requires FLOGR instruction, which is available since z9.
- return true;
-
case Op_ReverseBytesI:
case Op_ReverseBytesL:
return UseByteReverseInstruction;
-
- // PopCount supported by H/W from z/Architecture G5 (z196) on.
case Op_PopCountI:
case Op_PopCountL:
- return UsePopCountInstruction && VM_Version::has_PopCount();
-
- case Op_StrComp:
- return SpecialStringCompareTo;
- case Op_StrEquals:
- return SpecialStringEquals;
- case Op_StrIndexOf:
- case Op_StrIndexOfChar:
- return SpecialStringIndexOf;
-
- case Op_GetAndAddI:
- case Op_GetAndAddL:
- return true;
- // return VM_Version::has_AtomicMemWithImmALUOps();
- case Op_GetAndSetI:
- case Op_GetAndSetL:
- case Op_GetAndSetP:
- case Op_GetAndSetN:
- return true; // General CAS implementation, always available.
-
- default:
- return true; // Per default match rules are supported.
- // BUT: make sure match rule is not disabled by a false predicate!
+ // PopCount supported by H/W from z/Architecture G5 (z196) on.
+ return (UsePopCountInstruction && VM_Version::has_PopCount());
}
- return true; // Per default match rules are supported.
- // BUT: make sure match rule is not disabled by a false predicate!
+ return true; // Per default match rules are supported.
}
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
- // TODO
- // Identify extra cases that we might want to provide match rules for
- // e.g. Op_ vector nodes and other intrinsics while guarding with vlen.
- bool ret_value = match_rule_supported(opcode);
- // Add rules here.
-
- return ret_value; // Per default match rules are supported.
+ if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) {
+ return false;
+ }
+ return true; // Per default match rules are supported.
}
const bool Matcher::has_predicated_vectors(void) {
return false;
}
+bool Matcher::supports_vector_variable_shifts(void) {
+ return false; // not supported
+}
+
const int Matcher::float_pressure(int default_pressure_threshold) {
return default_pressure_threshold;
}
@@ -2462,12 +2432,6 @@ frame %{
// Tos is loaded in run_compiled_code to Z_ARG5=Z_R6.
// interpreter_arg_ptr_reg(Z_R6);
- // Temporary in compiled entry-points
- // compiler_method_reg(Z_R1);//Z_R1_scratch
-
- // Method Register when calling interpreter
- interpreter_method_reg(Z_R9);//Z_method
-
// Optional: name the operand used by cisc-spilling to access
// [stack_pointer + offset].
cisc_spilling_operand_name(indOffset12);
@@ -3531,20 +3495,6 @@ operand inline_cache_regP(iRegP reg) %{
interface(REG_INTER);
%}
-operand compiler_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
-operand interpreter_method_regP(iRegP reg) %{
- constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_reg
- match(reg);
- format %{ %}
- interface(REG_INTER);
-%}
-
// Operands to remove register moves in unscaled mode.
// Match read/write registers with an EncodeP node if neither shift nor add are required.
operand iRegP2N(iRegP reg) %{
@@ -10172,8 +10122,9 @@ instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2
instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U);
ins_cost(200);
- format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+ format %{ "StringUTF16 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
ins_encode %{
__ string_indexof_char($result$$Register,
$haystack$$Register, $haycnt$$Register,
@@ -10183,6 +10134,21 @@ instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, rod
ins_pipe(pipe_class_dummy);
%}
+instruct indexOfChar_L(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
+ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch));
+ effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
+ predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L);
+ ins_cost(200);
+ format %{ "StringLatin1 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %}
+ ins_encode %{
+ __ string_indexof_char($result$$Register,
+ $haystack$$Register, $haycnt$$Register,
+ $ch$$Register, 0 /* unused, ch is in register */,
+ $oddReg$$Register, $evenReg$$Register, true /*is_byte*/);
+ %}
+ ins_pipe(pipe_class_dummy);
+%}
+
instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{
match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt)));
effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too.
@@ -10809,7 +10775,7 @@ instruct Repl2F_imm0(iRegL dst, immFp0 src) %{
ins_pipe(pipe_class_dummy);
%}
-// Store
+// Load/Store vector
// Store Aligned Packed Byte register to memory (8 Bytes).
instruct storeA8B(memory mem, iRegL src) %{
@@ -10823,8 +10789,6 @@ instruct storeA8B(memory mem, iRegL src) %{
ins_pipe(pipe_class_dummy);
%}
-// Load
-
instruct loadV8(iRegL dst, memory mem) %{
match(Set dst (LoadVector mem));
predicate(n->as_LoadVector()->memory_size() == 8);
@@ -10836,6 +10800,15 @@ instruct loadV8(iRegL dst, memory mem) %{
ins_pipe(pipe_class_dummy);
%}
+// Reinterpret: only one vector size used
+instruct reinterpret(iRegL dst) %{
+ match(Set dst (VectorReinterpret dst));
+ ins_cost(0);
+ format %{ "reinterpret $dst" %}
+ ins_encode( /*empty*/ );
+ ins_pipe(pipe_class_dummy);
+%}
+
//----------POPULATION COUNT RULES--------------------------------------------
// Byte reverse
diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
index 48ac8ae443c..a0c46b182ff 100644
--- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
+++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp
@@ -35,6 +35,7 @@
#include "nativeInst_s390.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "registerSaver_s390.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
@@ -1284,163 +1285,6 @@ static void move32_64(MacroAssembler *masm,
}
}
-static void save_or_restore_arguments(MacroAssembler *masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap *map,
- VMRegPair *in_regs,
- BasicType *in_sig_bt) {
-
- // If map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int slot = arg_save_area;
- // Handle double words first.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)");
- const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ freg2mem_opt(freg, stackaddr);
- } else {
- __ mem2freg_opt(freg, stackaddr);
- }
- } else if (in_regs[i].first()->is_Register() &&
- (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
- int offset = slot * VMRegImpl::stack_slot_size;
- const Register reg = in_regs[i].first()->as_Register();
- if (map != NULL) {
- __ z_stg(reg, offset, Z_SP);
- if (in_sig_bt[i] == T_ARRAY) {
- map->set_oop(VMRegImpl::stack2reg(slot));
- }
- } else {
- __ z_lg(reg, offset, Z_SP);
- }
- slot += VMRegImpl::slots_per_word;
- assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)");
- }
- }
-
- // Save or restore single word registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int offset = slot * VMRegImpl::stack_slot_size;
- // Value lives in an input register. Save it on stack.
- switch (in_sig_bt[i]) {
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT: {
- const Register reg = in_regs[i].first()->as_Register();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ z_st(reg, stackaddr);
- } else {
- __ z_lgf(reg, stackaddr);
- }
- slot++;
- assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)");
- break;
- }
- case T_ARRAY:
- case T_LONG:
- // handled above
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_FloatRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int offset = slot * VMRegImpl::stack_slot_size;
- slot++;
- assert(slot <= stack_slots, "overflow (after FLOAT stack slot)");
- const FloatRegister freg = in_regs[i].first()->as_FloatRegister();
- Address stackaddr(Z_SP, offset);
- if (map != NULL) {
- __ freg2mem_opt(freg, stackaddr, false);
- } else {
- __ mem2freg_opt(freg, stackaddr, false);
- }
- }
- } else if (in_regs[i].first()->is_stack() &&
- in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler *masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMapSet *oop_maps,
- VMRegPair *in_regs,
- BasicType *in_sig_bt) {
- __ block_comment("check GCLocker::needs_gc");
- Label cont;
-
- // Check GCLocker::_needs_gc flag.
- __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address());
- __ z_cli(0, Z_R1_scratch, 0);
- __ z_bre(cont);
-
- // Save down any values that are live in registers and call into the
- // runtime to halt for a GC.
- OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
- address the_pc = __ pc();
- __ set_last_Java_frame(Z_SP, noreg);
-
- __ block_comment("block_for_jni_critical");
- __ z_lgr(Z_ARG1, Z_thread);
-
- address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical);
- __ call_c(entry_point);
- oop_maps->add_gc_map(__ offset(), map);
-
- __ reset_last_Java_frame();
-
- // Reload all the register arguments.
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
-
- __ bind(cont);
-
- if (StressCriticalJNINatives) {
- // Stress register saving
- OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, map, in_regs, in_sig_bt);
-
- // Destroy argument registers.
- for (int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- // Don't set CC.
- __ clear_reg(in_regs[i].first()->as_Register(), true, false);
- } else {
- if (in_regs[i].first()->is_FloatRegister()) {
- FloatRegister fr = in_regs[i].first()->as_FloatRegister();
- __ z_lcdbr(fr, fr);
- }
- }
- }
-
- save_or_restore_arguments(masm, stack_slots, total_in_args,
- arg_save_area, NULL, in_regs, in_sig_bt);
- }
-}
-
static void move_ptr(MacroAssembler *masm,
VMRegPair src,
VMRegPair dst,
@@ -1857,12 +1701,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
OopMapSet *oop_maps = new OopMapSet();
OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
- if (is_critical_native) {
- check_needs_gc_for_critical_native(masm, stack_slots, total_in_args,
- oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt);
- }
-
-
//////////////////////////////////////////////////////////////////////
//
// The Grand Shuffle
@@ -2091,9 +1929,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Use that pc we placed in Z_R10 a while back as the current frame anchor.
__ set_last_Java_frame(Z_SP, Z_R10);
- // Transition from _thread_in_Java to _thread_in_native.
- __ set_thread_state(_thread_in_native);
-
+ if (!is_critical_native) {
+ // Transition from _thread_in_Java to _thread_in_native.
+ __ set_thread_state(_thread_in_native);
+ }
//////////////////////////////////////////////////////////////////////
// This is the JNI call.
@@ -2139,6 +1978,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
break;
}
+ Label after_transition;
+
+ // If this is a critical native, check for a safepoint or suspend request after the call.
+ // If a safepoint is needed, transition to native, then to native_trans to handle
+ // safepoints like the native methods that are not critical natives.
+ if (is_critical_native) {
+ Label needs_safepoint;
+ // Does this need to save_native_result and fences?
+ __ safepoint_poll(needs_safepoint, Z_R1);
+ __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
+ __ z_bre(after_transition);
+ __ bind(needs_safepoint);
+ }
// Switch thread to "native transition" state before reading the synchronization state.
// This additional state is necessary because reading and testing the synchronization
@@ -2158,7 +2010,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
// Block, if necessary, before resuming in _thread_in_Java state.
// In order for GC to work, don't clear the last_Java_sp until after blocking.
//--------------------------------------------------------------------
- Label after_transition;
{
Label no_block, sync;
@@ -2180,15 +2031,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ bind(sync);
__ z_acquire();
- address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)
- : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
+ address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
__ call_VM_leaf(entry_point, Z_thread);
- if (is_critical_native) {
- restore_native_result(masm, ret_type, workspace_slot_offset);
- __ z_bru(after_transition); // No thread state transition here.
- }
__ bind(no_block);
restore_native_result(masm, ret_type, workspace_slot_offset);
}
@@ -2201,7 +2047,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ set_thread_state(_thread_in_Java);
__ bind(after_transition);
-
//--------------------------------------------------------------------
// Reguard any pages if necessary.
// Protect native result from being destroyed.
@@ -2384,10 +2229,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
in_ByteSize(lock_offset),
oop_maps);
- if (is_critical_native) {
- nm->set_lazy_critical_native(true);
- }
-
return nm;
}
diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
index 5d8b11332d8..e1862f11c49 100644
--- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
+++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp
@@ -856,7 +856,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_
// Compute the beginning of the protected zone minus the requested frame size.
__ z_sgr(tmp1, tmp2);
- __ add2reg(tmp1, JavaThread::stack_guard_zone_size());
+ __ add2reg(tmp1, StackOverflow::stack_guard_zone_size());
// Add in the size of the frame (which is the same as subtracting it from the
// SP, which would take another register.
diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp
index 9c372db9e78..7a4cf869c30 100644
--- a/src/hotspot/cpu/s390/templateTable_s390.cpp
+++ b/src/hotspot/cpu/s390/templateTable_s390.cpp
@@ -2007,7 +2007,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
// Out-of-line code runtime calls.
if (UseLoopCounter) {
- if (ProfileInterpreter) {
+ if (ProfileInterpreter && !TieredCompilation) {
// Out-of-line code to allocate method data oop.
__ bind(profile_method);
@@ -2377,7 +2377,7 @@ void TemplateTable::_return(TosState state) {
if (_desc->bytecode() != Bytecodes::_return_register_finalizer) {
Label no_safepoint;
- const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */);
+ const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */);
__ z_tm(poll_byte_addr, SafepointMechanism::poll_bit());
__ z_braz(no_safepoint);
__ push(state);
diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp
index 3460a767fac..0a769c9401f 100644
--- a/src/hotspot/cpu/s390/vm_version_s390.cpp
+++ b/src/hotspot/cpu/s390/vm_version_s390.cpp
@@ -221,6 +221,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
+ if (UseSHA3Intrinsics) {
+ warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
+ FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
+ }
+
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
@@ -831,7 +836,7 @@ void VM_Version::determine_features() {
code_end-code, cbuf_size, cbuf_size-(code_end-code));
// Use existing decode function. This enables the [MachCode] format which is needed to DecodeErrorFile.
- Disassembler::decode(&cbuf, code, code_end, tty);
+ Disassembler::decode(code, code_end, tty);
}
// Prepare for detection code execution and clear work buffer.
diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp
index ef04d33c7f4..3933bac000f 100644
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@@ -984,6 +984,8 @@ address Assembler::locate_operand(address inst, WhichOperand which) {
case 0x61: // pcmpestri r, r/a, #8
case 0x70: // pshufd r, r/a, #8
case 0x73: // psrldq r, #8
+ case 0x1f: // evpcmpd/evpcmpq
+ case 0x3f: // evpcmpb/evpcmpw
tail_size = 1; // the imm8
break;
default:
@@ -1209,6 +1211,11 @@ void Assembler::addb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::addw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x03, 0xC0, dst, src);
+}
+
void Assembler::addw(Address dst, int imm16) {
InstructionMark im(this);
emit_int8(0x66);
@@ -1415,6 +1422,11 @@ void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int16((unsigned char)0xDD, (0xC0 | encode));
}
+void Assembler::andw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x23, 0xC0, dst, src);
+}
+
void Assembler::andl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -1783,6 +1795,13 @@ void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
emit_int16((unsigned char)0xE6, (0xC0 | encode));
}
+void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xE6, (0xC0 | encode));
+}
+
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -1790,6 +1809,13 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
emit_int16(0x5B, (0xC0 | encode));
}
+void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5B, (0xC0 | encode));
+}
+
void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -1912,18 +1938,18 @@ void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
}
void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
- vector_len == AVX_256bit? VM_Version::supports_avx2() :
- vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x1C, (0xC0 | encode));
}
void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
- assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
- vector_len == AVX_256bit? VM_Version::supports_avx2() :
- vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x1D, (0xC0 | encode));
@@ -1946,6 +1972,85 @@ void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
emit_int16(0x1F, (0xC0 | encode));
}
+void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5A, (0xC0 | encode));
+}
+
+void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ attributes.set_rex_vex_w_reverted();
+ emit_int16(0x5A, (0xC0 | encode));
+}
+
+void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5B, (0xC0 | encode));
+}
+
+void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xE6, (0xC0 | encode));
+}
+
+void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x30, (0xC0 | encode));
+}
+
+void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x33, (0xC0 | encode));
+}
+
+void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x32, (0xC0 | encode));
+}
+
+void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x34, (0xC0 | encode));
+}
+
void Assembler::decl(Address dst) {
// Don't use it directly. Use MacroAssembler::decrement() instead.
InstructionMark im(this);
@@ -2543,28 +2648,34 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
}
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
-void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
-void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
@@ -2572,132 +2683,234 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
-void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
+void Assembler::evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
+ InstructionMark im(this);
+ bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
+ int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
+ InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x6F);
+ emit_operand(dst, src);
+}
+
+void Assembler::evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type) {
+ assert(VM_Version::supports_avx512vlbw(), "");
+ assert(src != xnoreg, "sanity");
+ assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, "");
+ InstructionMark im(this);
+ bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG;
+ int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3;
+ InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
+ emit_int8(0x7F);
+ emit_operand(src, dst);
+}
+
+void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
-void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
+void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
-void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) {
+ // Unmasked instruction
+ evmovdqul(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) {
+ // Unmasked isntruction
+ evmovdqul(dst, k0, src, /*merge*/ true, vector_len);
+}
+
+void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
emit_operand(src, dst);
}
void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ if (dst->encoding() == src->encoding()) return;
+ evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int16(0x6F, (0xC0 | encode));
}
void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) {
+ // Unmasked instruction
+ evmovdquq(dst, k0, src, /*merge*/ false, vector_len);
+}
+
+void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_operand(dst, src);
}
void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) {
+ // Unmasked instruction
+ evmovdquq(dst, k0, src, /*merge*/ true, vector_len);
+}
+
+void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
- attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
attributes.set_is_evex_instruction();
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@@ -2775,6 +2988,29 @@ void Assembler::movq(Address dst, XMMRegister src) {
emit_operand(src, dst);
}
+void Assembler::movq(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xD6, (0xC0 | encode));
+}
+
+void Assembler::movq(Register dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ // swap src/dst to get correct prefix
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x7E, (0xC0 | encode));
+}
+
+void Assembler::movq(XMMRegister dst, Register src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6E, (0xC0 | encode));
+}
+
void Assembler::movsbl(Register dst, Address src) { // movsxb
InstructionMark im(this);
prefix(src, dst);
@@ -3274,6 +3510,11 @@ void Assembler::notl(Register dst) {
emit_int16((unsigned char)0xF7, (0xD0 | encode));
}
+void Assembler::orw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x0B, 0xC0, dst, src);
+}
+
void Assembler::orl(Address dst, int32_t imm32) {
InstructionMark im(this);
prefix(dst);
@@ -3312,6 +3553,34 @@ void Assembler::orb(Address dst, int imm8) {
emit_int8(imm8);
}
+void Assembler::packsswb(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x63, (0xC0 | encode));
+}
+
+void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x63, (0xC0 | encode));
+}
+
+void Assembler::packssdw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6B, (0xC0 | encode));
+}
+
+void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x6B, (0xC0 | encode));
+}
+
void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -3337,21 +3606,74 @@ void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x67, (0xC0 | encode));
}
+void Assembler::packusdw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x2B, (0xC0 | encode));
+}
+
+void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "some form of AVX must be enabled");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x2B, (0xC0 | encode));
+}
+
void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
assert(VM_Version::supports_avx2(), "");
+ assert(vector_len != AVX_128bit, "");
+ // VEX.256.66.0F3A.W1 00 /r ib
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x00, (0xC0 | encode), imm8);
}
void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(UseAVX > 2, "requires AVX512F");
+ assert(vector_len == AVX_256bit ? VM_Version::supports_avx512vl() :
+ vector_len == AVX_512bit ? VM_Version::supports_evex() : false, "not supported");
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x36, (0xC0 | encode));
}
+void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512_vbmi(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0x8D, (0xC0 | encode));
+}
+
+void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() :
+ vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0x8D, (0xC0 | encode));
+}
+
+void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ // VEX.NDS.256.66.0F38.W0 36 /r
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x36, (0xC0 | encode));
+}
+
+void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ // VEX.NDS.256.66.0F38.W0 36 /r
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8(0x36);
+ emit_operand(dst, src);
+}
+
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
@@ -3366,6 +3688,28 @@ void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, in
emit_int24(0x06, (0xC0 | encode), imm8);
}
+void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x04, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x05, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
+ assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x01, (0xC0 | encode), imm8);
+}
+
void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -3374,7 +3718,6 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int
emit_int16(0x76, (0xC0 | encode));
}
-
void Assembler::pause() {
emit_int16((unsigned char)0xF3, (unsigned char)0x90);
}
@@ -3408,9 +3751,18 @@ void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
emit_int16(0x74, (0xC0 | encode));
}
+void Assembler::vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(cond_encoding, (0xC0 | encode));
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x74, (0xC0 | encode));
@@ -3497,7 +3849,7 @@ void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vect
void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
@@ -3517,7 +3869,8 @@ void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x75, (0xC0 | encode));
@@ -3554,29 +3907,32 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- assert(VM_Version::supports_avx(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x76, (0xC0 | encode));
}
// In this context, kdst is written the mask used to process the equal components
-void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
+void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_is_evex_instruction();
attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int16(0x76, (0xC0 | encode));
}
-void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
- attributes.reset_is_clear_context();
attributes.set_is_evex_instruction();
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
int dst_enc = kdst->encoding();
vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x76);
@@ -3591,6 +3947,13 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
emit_int16(0x29, (0xC0 | encode));
}
+void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(cond_encoding, (0xC0 | encode));
+}
+
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx(), "");
@@ -3623,11 +3986,36 @@ void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vect
emit_operand(as_Register(dst_enc), src);
}
-void Assembler::pmovmskb(Register dst, XMMRegister src) {
- assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
- int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xD7, (0xC0 | encode));
+void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
+}
+
+void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "");
+ assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
+}
+
+void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x37, (0xC0 | encode));
+}
+
+void Assembler::pmovmskb(Register dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xD7, (0xC0 | encode));
}
void Assembler::vpmovmskb(Register dst, XMMRegister src) {
@@ -3639,14 +4027,14 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src) {
void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x16, (0xC0 | encode), imm8);
}
void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3656,14 +4044,14 @@ void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x16, (0xC0 | encode), imm8);
}
void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x16);
@@ -3673,14 +4061,14 @@ void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8);
}
void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x15);
@@ -3688,9 +4076,16 @@ void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pextrb(Register dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x14, (0xC0 | encode), imm8);
+}
+
void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x14);
@@ -3700,14 +4095,14 @@ void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x22, (0xC0 | encode), imm8);
}
void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3715,16 +4110,23 @@ void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x22, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int24(0x22, (0xC0 | encode), imm8);
}
void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x22);
@@ -3732,16 +4134,23 @@ void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x22, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
}
void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse2(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xC4);
@@ -3749,9 +4158,16 @@ void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8);
+}
+
void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
assert(VM_Version::supports_sse4_1(), "");
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
emit_int8(0x20);
@@ -3759,6 +4175,34 @@ void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
emit_int8(imm8);
}
+void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x20, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x20, (0xC0 | encode), imm8);
+}
+
+void Assembler::insertps(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x21, (0xC0 | encode), imm8);
+}
+
+void Assembler::vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x21, (0xC0 | encode), imm8);
+}
+
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
@@ -3783,6 +4227,41 @@ void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
emit_int16(0x20, (0xC0 | encode));
}
+void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x21, (0xC0 | encode));
+}
+
+void Assembler::pmovzxbd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x22, (0xC0 | encode));
+}
+
+void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x23, (0xC0 | encode));
+}
+
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
@@ -3816,7 +4295,7 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec
assert(VM_Version::supports_avx512vlbw(), "");
assert(dst != xnoreg, "sanity");
InstructionMark im(this);
- InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -3824,6 +4303,86 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec
emit_int8(0x30);
emit_operand(dst, src);
}
+
+void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xDB, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x35, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x31, (0xC0 | encode));
+}
+
+void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x32, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x21, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x22, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x23, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x24, (0xC0 | encode));
+}
+
+void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ vector_len == AVX_256bit ? VM_Version::supports_avx2() :
+ VM_Version::supports_evex(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x25, (0xC0 | encode));
+}
+
void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512vlbw(), "");
assert(src != xnoreg, "sanity");
@@ -4050,6 +4609,14 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
emit_int8(mode & 0xFF);
}
+void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) {
+ assert(isByte(mode), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x70, (0xC0 | encode), mode & 0xFF);
+}
+
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4080,6 +4647,35 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i
emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF);
}
+void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(isByte(imm8), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) {
+ assert(isByte(imm8), "invalid value");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
+void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF);
+}
+
void Assembler::psrldq(XMMRegister dst, int shift) {
// Shift left 128 bit value in dst XMMRegister by shift number of bytes.
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4151,6 +4747,13 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src) {
emit_int16(0x17, (0xC0 | encode));
}
+void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x17, (0xC0 | encode));
+}
+
void Assembler::punpcklbw(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
@@ -4881,6 +5484,11 @@ void Assembler::xorb(Register dst, Address src) {
emit_operand(dst, src);
}
+void Assembler::xorw(Register dst, Register src) {
+ (void)prefix_and_encode(dst->encoding(), src->encoding());
+ emit_arith(0x33, 0xC0, dst, src);
+}
+
// AVX 3-operands scalar float-point arithmetic instructions
void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
@@ -5794,6 +6402,13 @@ void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
emit_int16(0x40, (0xC0 | encode));
}
+void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -5816,6 +6431,13 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
emit_int16(0x40, (0xC0 | encode));
}
+void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF4, (0xC0 | encode));
+}
+
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
@@ -5847,66 +6469,227 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vecto
emit_operand(dst, src);
}
-// Shift packed integers left by specified number of bits.
-void Assembler::psllw(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+// Min, max
+void Assembler::pminsb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 71 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x38, (0xC0 | encode));
}
-void Assembler::pslld(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 72 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
+void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x38, (0xC0 | encode));
}
-void Assembler::psllq(XMMRegister dst, int shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 73 /6 ib
- int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
+void Assembler::pminsw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEA, (0xC0 | encode));
}
-void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF1, (0xC0 | encode));
+void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEA, (0xC0 | encode));
}
-void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+void Assembler::pminsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF2, (0xC0 | encode));
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
- InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- attributes.set_rex_vex_w_reverted();
- int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int16((unsigned char)0xF3, (0xC0 | encode));
+void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
- assert(UseAVX > 0, "requires some form of AVX");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 71 /6 ib
- int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512F");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x39, (0xC0 | encode));
}
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
- assert(UseAVX > 0, "requires some form of AVX");
- NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+void Assembler::minps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
- // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+
+void Assembler::minpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5D, (0xC0 | encode));
+}
+
+void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3C, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3C, (0xC0 | encode));
+}
+
+void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse2(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEE, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEE, (0xC0 | encode));
+}
+
+void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() :
+ (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512F");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x3D, (0xC0 | encode));
+}
+
+void Assembler::maxps(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::maxpd(XMMRegister dst, XMMRegister src) {
+ NOT_LP64(assert(VM_Version::supports_sse(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x5F, (0xC0 | encode));
+}
+
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+ int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x73, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF1, (0xC0 | encode));
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF2, (0xC0 | encode));
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xF3, (0xC0 | encode));
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+ int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24(0x71, (0xC0 | encode), shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+ assert(UseAVX > 0, "requires some form of AVX");
+ NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ // XMM6 is for /6 encoding: 66 0F 72 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int24(0x72, (0xC0 | encode), shift & 0xFF);
}
@@ -6168,13 +6951,67 @@ void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0xDB, (0xC0 | encode));
}
+//Variable Shift packed integers logically left.
+void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x47, (0xC0 | encode));
+}
+
+void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x47, (0xC0 | encode));
+}
+
+//Variable Shift packed integers logically right.
+void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x45, (0xC0 | encode));
+}
+
+void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x45, (0xC0 | encode));
+}
+
+//Variable right Shift arithmetic packed integers .
+void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 1, "requires AVX2");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x46, (0xC0 | encode));
+}
+
+void Assembler::evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_avx512bw(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x11, (0xC0 | encode));
+}
+
+void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+ assert(UseAVX > 2, "requires AVX512");
+ assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x46, (0xC0 | encode));
+}
+
void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
- emit_int8(0x71);
- emit_int8((0xC0 | encode));
+ emit_int16(0x71, (0xC0 | encode));
}
void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
@@ -6200,7 +7037,6 @@ void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
emit_int16((unsigned char)0xDF, (0xC0 | encode));
}
-
void Assembler::por(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6233,6 +7069,35 @@ void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vec
}
+void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEB, (0xC0 | encode));
+}
+
+void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int8((unsigned char)0xEB);
+ emit_operand(dst, src);
+}
+
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6257,13 +7122,33 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_
emit_operand(dst, src);
}
+void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(UseAVX > 2, "requires some form of EVEX");
+ InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+ attributes.set_rex_vex_w_reverted();
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
+}
+
+void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
+}
+
void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "requires EVEX support");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_is_evex_instruction();
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
- emit_int8((unsigned char)0xEF);
- emit_int8((0xC0 | encode));
+ emit_int16((unsigned char)0xEF, (0xC0 | encode));
}
void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
@@ -6960,12 +7845,67 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int16(0x7C, (0xC0 | encode));
}
+
+void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
+ assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
+ vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
- attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
attributes.reset_is_clear_context();
attributes.set_embedded_opmask_register_specifier(mask);
attributes.set_is_evex_instruction();
@@ -6974,6 +7914,116 @@ void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int ve
emit_int8((unsigned char)0x90);
emit_operand(dst, src);
}
+
+void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x90);
+ emit_operand(dst, src);
+}
+
+void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(dst != xnoreg, "sanity");
+ assert(src.isxmmindex(),"expected to be xmm index");
+ assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ // swap src<->dst for encoding
+ vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0x92);
+ emit_operand(dst, src);
+}
+
+void Assembler::evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA0);
+ emit_operand(src, dst);
+}
+
+void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA0);
+ emit_operand(src, dst);
+}
+
+void Assembler::evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA2);
+ emit_operand(src, dst);
+}
+
+void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(mask != k0, "instruction will #UD if mask is in k0");
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
+ attributes.reset_is_clear_context();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.set_is_evex_instruction();
+ vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int8((unsigned char)0xA2);
+ emit_operand(src, dst);
+}
// Carry-Less Multiplication Quadword
void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
assert(VM_Version::supports_clmul(), "");
@@ -7571,7 +8621,8 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo
// fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
byte4 |= ((_attributes->get_vector_len())& 0x3) << 5;
// last is EVEX.z for zero/merge actions
- if (_attributes->is_no_reg_mask() == false) {
+ if (_attributes->is_no_reg_mask() == false &&
+ _attributes->get_embedded_opmask_register_specifier() != 0) {
byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0);
}
@@ -7739,7 +8790,7 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
emit_int16(0x5D, (0xC0 | encode));
}
-void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
+void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -7756,8 +8807,8 @@ void Assembler::blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int24(0x4C, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+ assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -7765,28 +8816,330 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM
emit_int24(0x4B, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
- assert(VM_Version::supports_avx(), "");
+void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ assert(VM_Version::supports_avx2(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
- int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
- emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop));
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
}
-void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(vector_len <= AVX_256bit, "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), (unsigned char)comparison);
+}
+
+void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
+}
+
+void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison);
+}
+
+void Assembler::blendvps(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x14, (0xC0 | encode));
+}
+
+void Assembler::blendvpd(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x15, (0xC0 | encode));
+}
+
+void Assembler::pblendvb(XMMRegister dst, XMMRegister src) {
+ assert(VM_Version::supports_sse4_1(), "");
+ assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding");
+ InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x10, (0xC0 | encode));
+}
+
+void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+ assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
int src2_enc = src2->encoding();
emit_int24(0x4A, (0xC0 | encode), (0xF0 & src2_enc << 4));
}
-void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
- assert(VM_Version::supports_avx2(), "");
- InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+void Assembler::vblendps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
- emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8);
+ emit_int24(0x0C, (0xC0 | encode), imm8);
+}
+
+void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+ assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), "");
+ assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x37, (0xC0 | encode));
+}
+
+void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x1F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x1F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x1F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x1F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x3F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x3F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int24(0x3F, (0xC0 | encode), comparison);
+}
+
+void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ assert(comparison >= Assembler::eq && comparison <= Assembler::_true, "");
+ // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib
+ InstructionMark im(this);
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ attributes.reset_is_clear_context();
+ int dst_enc = kdst->encoding();
+ vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ emit_int8((unsigned char)0x3F);
+ emit_operand(as_Register(dst_enc), src);
+ emit_int8((unsigned char)comparison);
+}
+
+void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) {
+ assert(VM_Version::supports_avx(), "");
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+ int mask_enc = mask->encoding();
+ emit_int24(0x4C, (0xC0 | encode), 0xF0 & mask_enc << 4);
+}
+
+void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x65, (0xC0 | encode));
+}
+
+void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ assert(VM_Version::supports_avx512bw(), "");
+ // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x66, (0xC0 | encode));
+}
+
+void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
+}
+
+void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
+ assert(VM_Version::supports_evex(), "");
+ //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r
+ InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+ attributes.set_is_evex_instruction();
+ attributes.set_embedded_opmask_register_specifier(mask);
+ if (merge) {
+ attributes.reset_is_clear_context();
+ }
+ int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+ emit_int16(0x64, (0xC0 | encode));
}
void Assembler::shlxl(Register dst, Register src1, Register src2) {
@@ -7803,6 +9156,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) {
emit_int16((unsigned char)0xF7, (0xC0 | encode));
}
+void Assembler::shrxq(Register dst, Register src1, Register src2) {
+ assert(VM_Version::supports_bmi2(), "");
+ InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
+ int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes);
+ emit_int16((unsigned char)0xF7, (0xC0 | encode));
+}
+
#ifndef _LP64
void Assembler::incl(Register dst) {
@@ -8443,7 +9803,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) {
void Assembler::cmpq(Address dst, Register src) {
InstructionMark im(this);
- emit_int16(get_prefixq(dst, src), 0x3B);
+ emit_int16(get_prefixq(dst, src), 0x39);
emit_operand(src, dst);
}
diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp
index 283285dc347..1d6eb41bd05 100644
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@@ -588,6 +588,7 @@ class Assembler : public AbstractAssembler {
#endif
};
+ // Comparison predicates for integral types & FP types when using SSE
enum ComparisonPredicate {
eq = 0,
lt = 1,
@@ -599,6 +600,51 @@ class Assembler : public AbstractAssembler {
_true = 7
};
+ // Comparison predicates for FP types when using AVX
+ // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
+ // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
+ enum ComparisonPredicateFP {
+ EQ_OQ = 0,
+ LT_OS = 1,
+ LE_OS = 2,
+ UNORD_Q = 3,
+ NEQ_UQ = 4,
+ NLT_US = 5,
+ NLE_US = 6,
+ ORD_Q = 7,
+ EQ_UQ = 8,
+ NGE_US = 9,
+ NGT_US = 0xA,
+ FALSE_OQ = 0XB,
+ NEQ_OQ = 0xC,
+ GE_OS = 0xD,
+ GT_OS = 0xE,
+ TRUE_UQ = 0xF,
+ EQ_OS = 0x10,
+ LT_OQ = 0x11,
+ LE_OQ = 0x12,
+ UNORD_S = 0x13,
+ NEQ_US = 0x14,
+ NLT_UQ = 0x15,
+ NLE_UQ = 0x16,
+ ORD_S = 0x17,
+ EQ_US = 0x18,
+ NGE_UQ = 0x19,
+ NGT_UQ = 0x1A,
+ FALSE_OS = 0x1B,
+ NEQ_OS = 0x1C,
+ GE_OQ = 0x1D,
+ GT_OQ = 0x1E,
+ TRUE_US =0x1F
+ };
+
+ enum Width {
+ B = 0,
+ W = 1,
+ D = 2,
+ Q = 3
+ };
+
//---< calculate length of instruction >---
// As instruction size can't be found out easily on x86/x64,
// we just use '4' for len and maxlen.
@@ -794,7 +840,6 @@ class Assembler : public AbstractAssembler {
void decl(Register dst);
void decl(Address dst);
- void decq(Register dst);
void decq(Address dst);
void incl(Register dst);
@@ -879,6 +924,7 @@ class Assembler : public AbstractAssembler {
void popa_uncached();
#endif
void vzeroupper_uncached();
+ void decq(Register dst);
void pusha();
void popa();
@@ -918,6 +964,7 @@ class Assembler : public AbstractAssembler {
void adcq(Register dst, Register src);
void addb(Address dst, int imm8);
+ void addw(Register dst, Register src);
void addw(Address dst, int imm16);
void addl(Address dst, int32_t imm32);
@@ -968,6 +1015,8 @@ class Assembler : public AbstractAssembler {
void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void andw(Register dst, Register src);
+
void andl(Address dst, int32_t imm32);
void andl(Register dst, int32_t imm32);
void andl(Register dst, Address src);
@@ -1093,9 +1142,11 @@ class Assembler : public AbstractAssembler {
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
void cvtdq2pd(XMMRegister dst, XMMRegister src);
+ void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
void cvtdq2ps(XMMRegister dst, XMMRegister src);
+ void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
void cvtss2sd(XMMRegister dst, XMMRegister src);
@@ -1111,8 +1162,25 @@ class Assembler : public AbstractAssembler {
void cvttss2sil(Register dst, XMMRegister src);
void cvttss2siq(Register dst, XMMRegister src);
+ // Convert vector double to int
void cvttpd2dq(XMMRegister dst, XMMRegister src);
+ // Convert vector float and double
+ void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Convert vector long to vector FP
+ void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
+ void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
+
+ // Evex casts with truncation
+ void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
+ void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
+
//Abs of packed Integer values
void pabsb(XMMRegister dst, XMMRegister src);
void pabsw(XMMRegister dst, XMMRegister src);
@@ -1472,20 +1540,30 @@ class Assembler : public AbstractAssembler {
void vmovdqu(XMMRegister dst, XMMRegister src);
// Move Unaligned 512bit Vector
- void evmovdqub(Address dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, Address src, int vector_len);
- void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
- void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
- void evmovdquw(Address dst, XMMRegister src, int vector_len);
- void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
- void evmovdquw(XMMRegister dst, Address src, int vector_len);
- void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len);
+ void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
void evmovdqul(Address dst, XMMRegister src, int vector_len);
void evmovdqul(XMMRegister dst, Address src, int vector_len);
void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
void evmovdquq(Address dst, XMMRegister src, int vector_len);
void evmovdquq(XMMRegister dst, Address src, int vector_len);
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
+
+ // Generic move instructions.
+ void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type);
+ void evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type);
// Move lower 64bit to high 64bit in 128bit register
void movlhps(XMMRegister dst, XMMRegister src);
@@ -1517,6 +1595,9 @@ class Assembler : public AbstractAssembler {
// Move Quadword
void movq(Address dst, XMMRegister src);
void movq(XMMRegister dst, Address src);
+ void movq(XMMRegister dst, XMMRegister src);
+ void movq(Register dst, XMMRegister src);
+ void movq(XMMRegister dst, Register src);
void movsbl(Register dst, Address src);
void movsbl(Register dst, Register src);
@@ -1597,6 +1678,8 @@ class Assembler : public AbstractAssembler {
void btrq(Address dst, int imm8);
#endif
+ void orw(Register dst, Register src);
+
void orl(Address dst, int32_t imm32);
void orl(Register dst, int32_t imm32);
void orl(Register dst, Address src);
@@ -1610,17 +1693,32 @@ class Assembler : public AbstractAssembler {
void orq(Register dst, Address src);
void orq(Register dst, Register src);
+ // Pack with signed saturation
+ void packsswb(XMMRegister dst, XMMRegister src);
+ void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void packssdw(XMMRegister dst, XMMRegister src);
+ void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
// Pack with unsigned saturation
void packuswb(XMMRegister dst, XMMRegister src);
void packuswb(XMMRegister dst, Address src);
+ void packusdw(XMMRegister dst, XMMRegister src);
void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- // Pemutation of 64bit words
+ // Permutations
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+ void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
+ void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void pause();
@@ -1633,11 +1731,14 @@ class Assembler : public AbstractAssembler {
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
void pcmpeqb(XMMRegister dst, XMMRegister src);
+ void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
+
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
@@ -1650,16 +1751,22 @@ class Assembler : public AbstractAssembler {
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
- void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
+ void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+ void pcmpgtq(XMMRegister dst, XMMRegister src);
+ void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
void pmovmskb(Register dst, XMMRegister src);
void vpmovmskb(Register dst, XMMRegister src);
@@ -1668,6 +1775,7 @@ class Assembler : public AbstractAssembler {
void pextrq(Register dst, XMMRegister src, int imm8);
void pextrd(Address dst, XMMRegister src, int imm8);
void pextrq(Address dst, XMMRegister src, int imm8);
+ void pextrb(Register dst, XMMRegister src, int imm8);
void pextrb(Address dst, XMMRegister src, int imm8);
// SSE 2 extract
void pextrw(Register dst, XMMRegister src, int imm8);
@@ -1676,21 +1784,46 @@ class Assembler : public AbstractAssembler {
// SSE 4.1 insert
void pinsrd(XMMRegister dst, Register src, int imm8);
void pinsrq(XMMRegister dst, Register src, int imm8);
+ void pinsrb(XMMRegister dst, Register src, int imm8);
void pinsrd(XMMRegister dst, Address src, int imm8);
void pinsrq(XMMRegister dst, Address src, int imm8);
void pinsrb(XMMRegister dst, Address src, int imm8);
+ void insertps(XMMRegister dst, XMMRegister src, int imm8);
// SSE 2 insert
void pinsrw(XMMRegister dst, Register src, int imm8);
void pinsrw(XMMRegister dst, Address src, int imm8);
- // SSE4.1 packed move
+ // AVX insert
+ void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
+ void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
+
+ // Zero extend moves
void pmovzxbw(XMMRegister dst, XMMRegister src);
void pmovzxbw(XMMRegister dst, Address src);
-
+ void pmovzxbd(XMMRegister dst, XMMRegister src);
void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
+ void pmovzxdq(XMMRegister dst, XMMRegister src);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ // Sign extend moves
+ void pmovsxbd(XMMRegister dst, XMMRegister src);
+ void pmovsxbq(XMMRegister dst, XMMRegister src);
+ void pmovsxbw(XMMRegister dst, XMMRegister src);
+ void pmovsxwd(XMMRegister dst, XMMRegister src);
+ void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
+ void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
+
void evpmovwb(Address dst, XMMRegister src, int vector_len);
void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
@@ -1698,10 +1831,6 @@ class Assembler : public AbstractAssembler {
void evpmovdb(Address dst, XMMRegister src, int vector_len);
- // Sign extend moves
- void pmovsxbw(XMMRegister dst, XMMRegister src);
- void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
-
// Multiply add
void pmaddwd(XMMRegister dst, XMMRegister src);
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1745,10 +1874,17 @@ class Assembler : public AbstractAssembler {
void pshufd(XMMRegister dst, Address src, int mode);
void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
- // Shuffle Packed Low Words
+ // Shuffle Packed High/Low Words
+ void pshufhw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, XMMRegister src, int mode);
void pshuflw(XMMRegister dst, Address src, int mode);
+ //shuffle floats and doubles
+ void pshufps(XMMRegister, XMMRegister, int);
+ void pshufpd(XMMRegister, XMMRegister, int);
+ void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
+ void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
+
// Shuffle packed values at 128 bit granularity
void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
@@ -1764,6 +1900,9 @@ class Assembler : public AbstractAssembler {
void vptest(XMMRegister dst, XMMRegister src);
void vptest(XMMRegister dst, Address src);
+ // Vector compare
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len);
+
// Interleave Low Bytes
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src);
@@ -1837,6 +1976,7 @@ class Assembler : public AbstractAssembler {
void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
void pblendw(XMMRegister dst, XMMRegister src, int imm8);
+ void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
void sha1nexte(XMMRegister dst, XMMRegister src);
@@ -1955,6 +2095,7 @@ class Assembler : public AbstractAssembler {
void xorl(Register dst, Register src);
void xorb(Register dst, Address src);
+ void xorw(Register dst, Register src);
void xorq(Register dst, Address src);
void xorq(Register dst, Register src);
@@ -1989,8 +2130,12 @@ class Assembler : public AbstractAssembler {
void shlxl(Register dst, Register src1, Register src2);
void shlxq(Register dst, Register src1, Register src2);
+ void shrxq(Register dst, Register src1, Register src2);
+
//====================VECTOR ARITHMETIC=====================================
+ void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len);
+ void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len);
// Add Packed Floating-Point Values
void addpd(XMMRegister dst, XMMRegister src);
@@ -2100,13 +2245,41 @@ class Assembler : public AbstractAssembler {
// Multiply packed integers (only shorts and ints)
void pmullw(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, XMMRegister src);
+ void pmuludq(XMMRegister dst, XMMRegister src);
void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ // Minimum of packed integers
+ void pminsb(XMMRegister dst, XMMRegister src);
+ void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsw(XMMRegister dst, XMMRegister src);
+ void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pminsd(XMMRegister dst, XMMRegister src);
+ void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minps(XMMRegister dst, XMMRegister src);
+ void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void minpd(XMMRegister dst, XMMRegister src);
+ void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
+ // Maximum of packed integers
+ void pmaxsb(XMMRegister dst, XMMRegister src);
+ void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsw(XMMRegister dst, XMMRegister src);
+ void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void pmaxsd(XMMRegister dst, XMMRegister src);
+ void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxps(XMMRegister dst, XMMRegister src);
+ void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+ void maxpd(XMMRegister dst, XMMRegister src);
+ void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
+
// Shift left packed integers
void psllw(XMMRegister dst, int shift);
void pslld(XMMRegister dst, int shift);
@@ -2148,9 +2321,22 @@ class Assembler : public AbstractAssembler {
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ // Variable shift left packed integers
+ void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right packed integers
+ void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
+ // Variable shift right arithmetic packed integers
+ void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+ void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
@@ -2158,6 +2344,7 @@ class Assembler : public AbstractAssembler {
void pand(XMMRegister dst, XMMRegister src);
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
// Andn packed integers
@@ -2170,10 +2357,15 @@ class Assembler : public AbstractAssembler {
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
+
// Xor packed integers
void pxor(XMMRegister dst, XMMRegister src);
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+ void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
@@ -2251,7 +2443,21 @@ class Assembler : public AbstractAssembler {
void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
- void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len);
+ // Gather AVX2 and AVX3
+ void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
+ void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
+ void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
+
+ //Scatter AVX3 only
+ void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
+ void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
// Carry-Less Multiplication Quadword
void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
@@ -2264,14 +2470,56 @@ class Assembler : public AbstractAssembler {
// runtime code and native libraries.
void vzeroupper();
- // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled.
+ // Vector double compares
+ void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
+ void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector float compares
+ void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
+ void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ ComparisonPredicateFP comparison, int vector_len);
+
+ // Vector integer compares
+ void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector long compares
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector byte compares
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector short compares
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len);
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
+ int comparison, int vector_len);
+
+ // Vector blends
+ void blendvps(XMMRegister dst, XMMRegister src);
+ void blendvpd(XMMRegister dst, XMMRegister src);
+ void pblendvb(XMMRegister dst, XMMRegister src);
void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
- void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
- void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
- void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
+ void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+ void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
-
+ void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
+ void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
protected:
// Next instructions require address alignment 16 bytes SSE mode.
// They should be called only from corresponding MacroAssembler instructions.
@@ -2367,7 +2615,8 @@ class InstructionAttr {
// Internal encoding data used in compressed immediate offset programming
void set_evex_encoding(int value) { _evex_encoding = value; }
- // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components
+ // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
+ // This method unsets it so that merge semantics are used instead.
void reset_is_clear_context(void) { _is_clear_context = false; }
// Map back to current asembler so that we can manage object level assocation
diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
index 526fe5af2fc..6853953f0eb 100644
--- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp
@@ -79,6 +79,32 @@ void ConversionStub::emit_code(LIR_Assembler* ce) {
}
#endif // !_LP64
+void C1SafepointPollStub::emit_code(LIR_Assembler* ce) {
+ __ bind(_entry);
+ InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset());
+#ifdef _LP64
+ __ lea(rscratch1, safepoint_pc);
+ __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
+#else
+ const Register tmp1 = rcx;
+ const Register tmp2 = rdx;
+ __ push(tmp1);
+ __ push(tmp2);
+
+ __ lea(tmp1, safepoint_pc);
+ __ get_thread(tmp2);
+ __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+
+ __ pop(tmp2);
+ __ pop(tmp1);
+#endif /* _LP64 */
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+ __ jump(RuntimeAddress(stub));
+}
+
void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
__ bind(_entry);
Metadata *m = _method->as_constant_ptr()->as_metadata();
diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
index 8a0200a18dc..bba946ec4ad 100644
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
@@ -25,6 +25,7 @@
#include "precompiled.hpp"
#include "asm/macroAssembler.hpp"
#include "asm/macroAssembler.inline.hpp"
+#include "c1/c1_CodeStubs.hpp"
#include "c1/c1_Compilation.hpp"
#include "c1/c1_LIRAssembler.hpp"
#include "c1/c1_MacroAssembler.hpp"
@@ -517,8 +518,7 @@ int LIR_Assembler::emit_deopt_handler() {
return offset;
}
-
-void LIR_Assembler::return_op(LIR_Opr result) {
+void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) {
assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
assert(result->fpu() == 0, "result must already be on TOS");
@@ -531,22 +531,18 @@ void LIR_Assembler::return_op(LIR_Opr result) {
__ reserved_stack_check();
}
- bool result_is_oop = result->is_valid() ? result->is_oop() : false;
-
// Note: we do not need to round double result; float result has the right precision
// the poll sets the condition code, but no data registers
#ifdef _LP64
- const Register poll_addr = rscratch1;
- __ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset()));
+ const Register thread = r15_thread;
#else
- const Register poll_addr = rbx;
- assert(FrameMap::is_caller_save_register(poll_addr), "will overwrite");
- __ get_thread(poll_addr);
- __ movptr(poll_addr, Address(poll_addr, Thread::polling_page_offset()));
+ const Register thread = rbx;
+ __ get_thread(thread);
#endif
+ code_stub->set_safepoint_offset(__ offset());
__ relocate(relocInfo::poll_return_type);
- __ testl(rax, Address(poll_addr, 0));
+ __ safepoint_poll(*code_stub->entry(), thread, true /* at_return */, true /* in_nmethod */);
__ ret(0);
}
diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
index 53935539a36..60347c41163 100644
--- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
+++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp
@@ -69,7 +69,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
push(thread);
#endif // _LP64
- int call_offset;
+ int call_offset = -1;
if (!align_stack) {
set_last_Java_frame(thread, noreg, rbp, NULL);
} else {
@@ -133,6 +133,8 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre
if (metadata_result->is_valid()) {
get_vm_result_2(metadata_result, thread);
}
+
+ assert(call_offset >= 0, "Should be set");
return call_offset;
}
diff --git a/src/hotspot/cpu/x86/c1_globals_x86.hpp b/src/hotspot/cpu/x86/c1_globals_x86.hpp
index fbf538c2cec..afd2a65cb89 100644
--- a/src/hotspot/cpu/x86/c1_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c1_globals_x86.hpp
@@ -33,8 +33,6 @@
#ifndef TIERED
define_pd_global(bool, BackgroundCompilation, true );
-define_pd_global(bool, UseTLAB, true );
-define_pd_global(bool, ResizeTLAB, true );
define_pd_global(bool, InlineIntrinsics, true );
define_pd_global(bool, PreferInterpreterNativeStubs, false);
define_pd_global(bool, ProfileTraps, false);
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
index 8940b0c3c44..3aef6446f78 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@@ -33,6 +33,21 @@
#include "runtime/objectMonitor.hpp"
#include "runtime/stubRoutines.hpp"
+inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) {
+ switch (vlen_in_bytes) {
+ case 4: // fall-through
+ case 8: // fall-through
+ case 16: return Assembler::AVX_128bit;
+ case 32: return Assembler::AVX_256bit;
+ case 64: return Assembler::AVX_512bit;
+
+ default: {
+ ShouldNotReachHere();
+ return Assembler::AVX_NoVec;
+ }
+ }
+}
+
void C2_MacroAssembler::setvectmask(Register dst, Register src) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
@@ -861,6 +876,174 @@ void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, i
}
}
+void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) {
+ assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity");
+ assert(tmp == xnoreg || elem_bt == T_LONG, "unused");
+
+ if (opcode == Op_MinV) {
+ if (elem_bt == T_BYTE) {
+ pminsb(dst, src);
+ } else if (elem_bt == T_SHORT) {
+ pminsw(dst, src);
+ } else if (elem_bt == T_INT) {
+ pminsd(dst, src);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ assert(tmp == xmm0, "required");
+ assert_different_registers(dst, src, tmp);
+ movdqu(xmm0, dst);
+ pcmpgtq(xmm0, src);
+ blendvpd(dst, src); // xmm0 as mask
+ }
+ } else { // opcode == Op_MaxV
+ if (elem_bt == T_BYTE) {
+ pmaxsb(dst, src);
+ } else if (elem_bt == T_SHORT) {
+ pmaxsw(dst, src);
+ } else if (elem_bt == T_INT) {
+ pmaxsd(dst, src);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ assert(tmp == xmm0, "required");
+ assert_different_registers(dst, src, tmp);
+ movdqu(xmm0, src);
+ pcmpgtq(xmm0, dst);
+ blendvpd(dst, src); // xmm0 as mask
+ }
+ }
+}
+
+void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ int vlen_enc) {
+ assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity");
+
+ if (opcode == Op_MinV) {
+ if (elem_bt == T_BYTE) {
+ vpminsb(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_SHORT) {
+ vpminsw(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_INT) {
+ vpminsd(dst, src1, src2, vlen_enc);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
+ vpminsq(dst, src1, src2, vlen_enc);
+ } else {
+ assert_different_registers(dst, src1, src2);
+ vpcmpgtq(dst, src1, src2, vlen_enc);
+ vblendvpd(dst, src1, src2, dst, vlen_enc);
+ }
+ }
+ } else { // opcode == Op_MaxV
+ if (elem_bt == T_BYTE) {
+ vpmaxsb(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_SHORT) {
+ vpmaxsw(dst, src1, src2, vlen_enc);
+ } else if (elem_bt == T_INT) {
+ vpmaxsd(dst, src1, src2, vlen_enc);
+ } else {
+ assert(elem_bt == T_LONG, "required");
+ if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) {
+ vpmaxsq(dst, src1, src2, vlen_enc);
+ } else {
+ assert_different_registers(dst, src1, src2);
+ vpcmpgtq(dst, src1, src2, vlen_enc);
+ vblendvpd(dst, src2, src1, dst, vlen_enc);
+ }
+ }
+ }
+}
+
+// Float/Double min max
+
+void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc) {
+ assert(UseAVX > 0, "required");
+ assert(opcode == Op_MinV || opcode == Op_MinReductionV ||
+ opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity");
+ assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity");
+ assert_different_registers(a, b, tmp, atmp, btmp);
+
+ bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
+ bool is_double_word = is_double_word_type(elem_bt);
+
+ if (!is_double_word && is_min) {
+ vblendvps(atmp, a, b, a, vlen_enc);
+ vblendvps(btmp, b, a, a, vlen_enc);
+ vminps(tmp, atmp, btmp, vlen_enc);
+ vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvps(dst, tmp, atmp, btmp, vlen_enc);
+ } else if (!is_double_word && !is_min) {
+ vblendvps(btmp, b, a, b, vlen_enc);
+ vblendvps(atmp, a, b, b, vlen_enc);
+ vmaxps(tmp, atmp, btmp, vlen_enc);
+ vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvps(dst, tmp, atmp, btmp, vlen_enc);
+ } else if (is_double_word && is_min) {
+ vblendvpd(atmp, a, b, a, vlen_enc);
+ vblendvpd(btmp, b, a, a, vlen_enc);
+ vminpd(tmp, atmp, btmp, vlen_enc);
+ vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
+ } else {
+ assert(is_double_word && !is_min, "sanity");
+ vblendvpd(btmp, b, a, b, vlen_enc);
+ vblendvpd(atmp, a, b, b, vlen_enc);
+ vmaxpd(tmp, atmp, btmp, vlen_enc);
+ vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ vblendvpd(dst, tmp, atmp, btmp, vlen_enc);
+ }
+}
+
+void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc) {
+ assert(UseAVX > 2, "required");
+ assert(opcode == Op_MinV || opcode == Op_MinReductionV ||
+ opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity");
+ assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity");
+ assert_different_registers(dst, a, b, atmp, btmp);
+
+ bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV);
+ bool is_double_word = is_double_word_type(elem_bt);
+ bool merge = true;
+
+ if (!is_double_word && is_min) {
+ evpmovd2m(ktmp, a, vlen_enc);
+ evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
+ vminps(dst, atmp, btmp, vlen_enc);
+ evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
+ } else if (!is_double_word && !is_min) {
+ evpmovd2m(ktmp, b, vlen_enc);
+ evblendmps(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmps(btmp, ktmp, b, a, merge, vlen_enc);
+ vmaxps(dst, atmp, btmp, vlen_enc);
+ evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdqul(dst, ktmp, atmp, merge, vlen_enc);
+ } else if (is_double_word && is_min) {
+ evpmovq2m(ktmp, a, vlen_enc);
+ evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
+ vminpd(dst, atmp, btmp, vlen_enc);
+ evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
+ } else {
+ assert(is_double_word && !is_min, "sanity");
+ evpmovq2m(ktmp, b, vlen_enc);
+ evblendmpd(atmp, ktmp, a, b, merge, vlen_enc);
+ evblendmpd(btmp, ktmp, b, a, merge, vlen_enc);
+ vmaxpd(dst, atmp, btmp, vlen_enc);
+ evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc);
+ evmovdquq(dst, ktmp, atmp, merge, vlen_enc);
+ }
+}
+
void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
if (sign) {
pmovsxbw(dst, src);
@@ -877,6 +1060,22 @@ void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, i
}
}
+void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
+ if (sign) {
+ vpmovsxbd(dst, src, vector_len);
+ } else {
+ vpmovzxbd(dst, src, vector_len);
+ }
+}
+
+void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
+ if (sign) {
+ vpmovsxwd(dst, src, vector_len);
+ } else {
+ vpmovzxwd(dst, src, vector_len);
+ }
+}
+
void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src,
int shift, int vector_len) {
if (opcode == Op_RotateLeftV) {
@@ -928,14 +1127,13 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) {
}
}
-void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
- if (opcode == Op_RShiftVI) {
- psrad(dst, src);
- } else if (opcode == Op_LShiftVI) {
- pslld(dst, src);
- } else {
- assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
- psrld(dst, src);
+void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVI: psrad(dst, shift); break;
+ case Op_LShiftVI: pslld(dst, shift); break;
+ case Op_URShiftVI: psrld(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -950,47 +1148,53 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds
}
}
-void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if (opcode == Op_RShiftVI) {
- vpsrad(dst, nds, src, vector_len);
- } else if (opcode == Op_LShiftVI) {
- vpslld(dst, nds, src, vector_len);
- } else {
- assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
- vpsrld(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break;
+ case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break;
+ case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) {
- if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
- psraw(dst, src);
- } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
- psllw(dst, src);
- } else {
- assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
- psrlw(dst, src);
+void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: psraw(dst, shift); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: psllw(dst, shift); break;
+
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVB: psrlw(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
- vpsraw(dst, nds, src, vector_len);
- } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
- vpsllw(dst, nds, src, vector_len);
- } else {
- assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
- vpsrlw(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
-void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
- if (opcode == Op_RShiftVL) {
- psrlq(dst, src); // using srl to implement sra on pre-avs512 systems
- } else if (opcode == Op_LShiftVL) {
- psllq(dst, src);
- } else {
- assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
- psrlq(dst, src);
+void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) {
+ switch (opcode) {
+ case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems
+ case Op_LShiftVL: psllq(dst, shift); break;
+ case Op_URShiftVL: psrlq(dst, shift); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -1005,14 +1209,13 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) {
}
}
-void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
- if (opcode == Op_RShiftVL) {
- evpsraq(dst, nds, src, vector_len);
- } else if (opcode == Op_LShiftVL) {
- vpsllq(dst, nds, src, vector_len);
- } else {
- assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
- vpsrlq(dst, nds, src, vector_len);
+void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break;
+ case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break;
+ case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
}
}
@@ -1027,45 +1230,351 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds
}
}
-// Reductions for vectors of ints, longs, floats, and doubles.
+void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: // fall-through
+ case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break;
+
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: // fall-through
+ case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVB: // fall-through
+ case Op_URShiftVS: // fall-through
+ case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) {
+ switch (opcode) {
+ case Op_RShiftVB: // fall-through
+ case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break;
-void C2_MacroAssembler::reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src) {
+ case Op_LShiftVB: // fall-through
+ case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break;
+
+ case Op_URShiftVB: // fall-through
+ case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break;
+
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) {
+ assert(UseAVX >= 2, "required");
+ switch (opcode) {
+ case Op_RShiftVL: {
+ if (UseAVX > 2) {
+ assert(tmp == xnoreg, "not used");
+ if (!VM_Version::supports_avx512vl()) {
+ vlen_enc = Assembler::AVX_512bit;
+ }
+ evpsravq(dst, src, shift, vlen_enc);
+ } else {
+ vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask()));
+ vpsrlvq(dst, src, shift, vlen_enc);
+ vpsrlvq(tmp, tmp, shift, vlen_enc);
+ vpxor(dst, dst, tmp, vlen_enc);
+ vpsubq(dst, dst, tmp, vlen_enc);
+ }
+ break;
+ }
+ case Op_LShiftVL: {
+ assert(tmp == xnoreg, "not used");
+ vpsllvq(dst, src, shift, vlen_enc);
+ break;
+ }
+ case Op_URShiftVL: {
+ assert(tmp == xnoreg, "not used");
+ vpsrlvq(dst, src, shift, vlen_enc);
+ break;
+ }
+ default: assert(false, "%s", NodeClassNames[opcode]);
+ }
+}
+
+// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst
+void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
+ assert(opcode == Op_LShiftVB ||
+ opcode == Op_RShiftVB ||
+ opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
+ bool sign = (opcode != Op_URShiftVB);
+ assert(vector_len == 0, "required");
+ vextendbd(sign, dst, src, 1);
+ vpmovzxbd(vtmp, shift, 1);
+ varshiftd(opcode, dst, dst, vtmp, 1);
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch);
+ vextracti128_high(vtmp, dst);
+ vpackusdw(dst, dst, vtmp, 0);
+}
+
+// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst
+void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) {
+ assert(opcode == Op_LShiftVB ||
+ opcode == Op_RShiftVB ||
+ opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]);
+ bool sign = (opcode != Op_URShiftVB);
+ int ext_vector_len = vector_len + 1;
+ vextendbw(sign, dst, src, ext_vector_len);
+ vpmovzxbw(vtmp, shift, ext_vector_len);
+ varshiftw(opcode, dst, dst, vtmp, ext_vector_len);
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch);
+ if (vector_len == 0) {
+ vextracti128_high(vtmp, dst);
+ vpackuswb(dst, dst, vtmp, vector_len);
+ } else {
+ vextracti64x4_high(vtmp, dst);
+ vpackuswb(dst, dst, vtmp, vector_len);
+ vpermq(dst, dst, 0xD8, vector_len);
+ }
+}
+
+void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) {
+ switch(typ) {
+ case T_BYTE:
+ pinsrb(dst, val, idx);
+ break;
+ case T_SHORT:
+ pinsrw(dst, val, idx);
+ break;
+ case T_INT:
+ pinsrd(dst, val, idx);
+ break;
+ case T_LONG:
+ pinsrq(dst, val, idx);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) {
+ switch(typ) {
+ case T_BYTE:
+ vpinsrb(dst, src, val, idx);
+ break;
+ case T_SHORT:
+ vpinsrw(dst, src, val, idx);
+ break;
+ case T_INT:
+ vpinsrd(dst, src, val, idx);
+ break;
+ case T_LONG:
+ vpinsrq(dst, src, val, idx);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len);
+ break;
+ case T_FLOAT:
+ vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len);
+ break;
+ case T_LONG:
+ vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len);
+ break;
+ case T_DOUBLE:
+ vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len);
+ break;
+ case T_FLOAT:
+ evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len);
+ break;
+ case T_LONG:
+ evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len);
+ break;
+ case T_DOUBLE:
+ evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) {
+ switch(typ) {
+ case T_INT:
+ evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len);
+ break;
+ case T_FLOAT:
+ evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len);
+ break;
+ case T_LONG:
+ evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len);
+ break;
+ case T_DOUBLE:
+ evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt) {
+ if (vlen_in_bytes <= 16) {
+ pxor (dst, dst);
+ psubb(dst, src);
+ switch (elem_bt) {
+ case T_BYTE: /* nothing to do */ break;
+ case T_SHORT: pmovsxbw(dst, dst); break;
+ case T_INT: pmovsxbd(dst, dst); break;
+ case T_FLOAT: pmovsxbd(dst, dst); break;
+ case T_LONG: pmovsxbq(dst, dst); break;
+ case T_DOUBLE: pmovsxbq(dst, dst); break;
+
+ default: assert(false, "%s", type2name(elem_bt));
+ }
+ } else {
+ int vlen_enc = vector_length_encoding(vlen_in_bytes);
+
+ vpxor (dst, dst, dst, vlen_enc);
+ vpsubb(dst, dst, src, vlen_enc);
+ switch (elem_bt) {
+ case T_BYTE: /* nothing to do */ break;
+ case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break;
+ case T_INT: vpmovsxbd(dst, dst, vlen_enc); break;
+ case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break;
+ case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break;
+ case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
+
+ default: assert(false, "%s", type2name(elem_bt));
+ }
+ }
+}
+
+void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
+ ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
+ if (vlen_in_bytes <= 16) {
+ movdqu(dst, addr, scratch);
+ } else if (vlen_in_bytes == 32) {
+ vmovdqu(dst, addr, scratch);
+ } else {
+ assert(vlen_in_bytes == 64, "%d", vlen_in_bytes);
+ evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
+ }
+}
+// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
+
+void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
int vector_len = Assembler::AVX_128bit;
switch (opcode) {
case Op_AndReductionV: pand(dst, src); break;
case Op_OrReductionV: por (dst, src); break;
case Op_XorReductionV: pxor(dst, src); break;
-
+ case Op_MinReductionV:
+ switch (typ) {
+ case T_BYTE: pminsb(dst, src); break;
+ case T_SHORT: pminsw(dst, src); break;
+ case T_INT: pminsd(dst, src); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpminsq(dst, dst, src, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MaxReductionV:
+ switch (typ) {
+ case T_BYTE: pmaxsb(dst, src); break;
+ case T_SHORT: pmaxsw(dst, src); break;
+ case T_INT: pmaxsd(dst, src); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVF: addss(dst, src); break;
case Op_AddReductionVD: addsd(dst, src); break;
- case Op_AddReductionVI: paddd(dst, src); break;
+ case Op_AddReductionVI:
+ switch (typ) {
+ case T_BYTE: paddb(dst, src); break;
+ case T_SHORT: paddw(dst, src); break;
+ case T_INT: paddd(dst, src); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVL: paddq(dst, src); break;
-
case Op_MulReductionVF: mulss(dst, src); break;
case Op_MulReductionVD: mulsd(dst, src); break;
- case Op_MulReductionVI: pmulld(dst, src); break;
- case Op_MulReductionVL: vpmullq(dst, dst, src, vector_len); break;
-
- default: assert(false, "wrong opcode");
+ case Op_MulReductionVI:
+ switch (typ) {
+ case T_SHORT: pmullw(dst, src); break;
+ case T_INT: pmulld(dst, src); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MulReductionVL: assert(UseAVX > 2, "required");
+ vpmullq(dst, dst, src, vector_len); break;
+ default: assert(false, "wrong opcode");
}
}
-void C2_MacroAssembler::reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
+void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
int vector_len = Assembler::AVX_256bit;
switch (opcode) {
case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break;
case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break;
case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break;
-
- case Op_AddReductionVI: vpaddd(dst, src1, src2, vector_len); break;
+ case Op_MinReductionV:
+ switch (typ) {
+ case T_BYTE: vpminsb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpminsw(dst, src1, src2, vector_len); break;
+ case T_INT: vpminsd(dst, src1, src2, vector_len); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpminsq(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_MaxReductionV:
+ switch (typ) {
+ case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break;
+ case T_INT: vpmaxsd(dst, src1, src2, vector_len); break;
+ case T_LONG: assert(UseAVX > 2, "required");
+ vpmaxsq(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
+ case Op_AddReductionVI:
+ switch (typ) {
+ case T_BYTE: vpaddb(dst, src1, src2, vector_len); break;
+ case T_SHORT: vpaddw(dst, src1, src2, vector_len); break;
+ case T_INT: vpaddd(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break;
-
- case Op_MulReductionVI: vpmulld(dst, src1, src2, vector_len); break;
+ case Op_MulReductionVI:
+ switch (typ) {
+ case T_SHORT: vpmullw(dst, src1, src2, vector_len); break;
+ case T_INT: vpmulld(dst, src1, src2, vector_len); break;
+ default: assert(false, "wrong type");
+ }
+ break;
case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break;
-
- default: assert(false, "wrong opcode");
+ default: assert(false, "wrong opcode");
}
}
@@ -1087,9 +1596,48 @@ void C2_MacroAssembler::reduce_fp(int opcode, int vlen,
}
}
+void C2_MacroAssembler::reduceB(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
+void C2_MacroAssembler::mulreduceB(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
+void C2_MacroAssembler::reduceS(int opcode, int vlen,
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
+ switch (vlen) {
+ case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+ case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break;
+
+ default: assert(false, "wrong vector length");
+ }
+}
+
void C2_MacroAssembler::reduceI(int opcode, int vlen,
- Register dst, Register src1, XMMRegister src2,
- XMMRegister vtmp1, XMMRegister vtmp2) {
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
switch (vlen) {
case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break;
@@ -1102,8 +1650,8 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen,
#ifdef _LP64
void C2_MacroAssembler::reduceL(int opcode, int vlen,
- Register dst, Register src1, XMMRegister src2,
- XMMRegister vtmp1, XMMRegister vtmp2) {
+ Register dst, Register src1, XMMRegister src2,
+ XMMRegister vtmp1, XMMRegister vtmp2) {
switch (vlen) {
case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break;
@@ -1158,10 +1706,10 @@ void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMReg
phaddd(vtmp1, vtmp1);
} else {
pshufd(vtmp1, src2, 0x1);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_INT, opcode, vtmp1, src2);
}
movdl(vtmp2, src1);
- reduce_operation_128(opcode, vtmp1, vtmp2);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
movdl(dst, vtmp1);
}
@@ -1174,7 +1722,7 @@ void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMReg
reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
} else {
pshufd(vtmp2, src2, 0xE);
- reduce_operation_128(opcode, vtmp2, src2);
+ reduce_operation_128(T_INT, opcode, vtmp2, src2);
reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
}
@@ -1187,51 +1735,176 @@ void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMReg
reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
} else {
vextracti128_high(vtmp1, src2);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_INT, opcode, vtmp1, src2);
reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
}
}
void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti64x4_high(vtmp2, src2);
- reduce_operation_256(opcode, vtmp2, vtmp2, src2);
+ reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2);
reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
+void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pshufd(vtmp2, src2, 0x1);
+ reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
+ movdqu(vtmp1, vtmp2);
+ psrldq(vtmp1, 2);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
+ movdqu(vtmp2, vtmp1);
+ psrldq(vtmp2, 1);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2);
+ movdl(vtmp2, src1);
+ pmovsxbd(vtmp1, vtmp1);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
+ pextrb(dst, vtmp1, 0x0);
+ movsbl(dst, dst);
+}
+
+void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pshufd(vtmp1, src2, 0xE);
+ reduce_operation_128(T_BYTE, opcode, vtmp1, src2);
+ reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ vextracti128_high(vtmp2, src2);
+ reduce_operation_128(T_BYTE, opcode, vtmp2, src2);
+ reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ vextracti64x4_high(vtmp1, src2);
+ reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2);
+ reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (UseAVX > 1) {
+ int vector_len = Assembler::AVX_256bit;
+ vpmovsxbw(vtmp1, src2, vector_len);
+ reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+ } else {
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+ pshufd(vtmp2, src2, 0x1);
+ pmovsxbw(vtmp2, src2);
+ reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+ }
+}
+
+void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (UseAVX > 2 && VM_Version::supports_avx512bw()) {
+ int vector_len = Assembler::AVX_512bit;
+ vpmovsxbw(vtmp1, src2, vector_len);
+ reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+ } else {
+ assert(UseAVX >= 2,"Should not reach here.");
+ mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2);
+ vextracti128_high(vtmp2, src2);
+ mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+ }
+}
+
+void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2);
+ vextracti64x4_high(vtmp2, src2);
+ mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ if (vtmp1 != src2) {
+ movdqu(vtmp1, src2);
+ }
+ phaddw(vtmp1, vtmp1);
+ phaddw(vtmp1, vtmp1);
+ } else {
+ pshufd(vtmp2, src2, 0x1);
+ reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
+ movdqu(vtmp1, vtmp2);
+ psrldq(vtmp1, 2);
+ reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2);
+ }
+ movdl(vtmp2, src1);
+ pmovsxwd(vtmp1, vtmp1);
+ reduce_operation_128(T_INT, opcode, vtmp1, vtmp2);
+ pextrw(dst, vtmp1, 0x0);
+ movswl(dst, dst);
+}
+
+void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ if (vtmp1 != src2) {
+ movdqu(vtmp1, src2);
+ }
+ phaddw(vtmp1, src2);
+ } else {
+ pshufd(vtmp1, src2, 0xE);
+ reduce_operation_128(T_SHORT, opcode, vtmp1, src2);
+ }
+ reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ if (opcode == Op_AddReductionVI) {
+ int vector_len = Assembler::AVX_256bit;
+ vphaddw(vtmp2, src2, src2, vector_len);
+ vpermq(vtmp2, vtmp2, 0xD8, vector_len);
+ } else {
+ vextracti128_high(vtmp2, src2);
+ reduce_operation_128(T_SHORT, opcode, vtmp2, src2);
+ }
+ reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
+}
+
+void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
+ int vector_len = Assembler::AVX_256bit;
+ vextracti64x4_high(vtmp1, src2);
+ reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2);
+ reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
+}
+
#ifdef _LP64
void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
pshufd(vtmp2, src2, 0xE);
- reduce_operation_128(opcode, vtmp2, src2);
+ reduce_operation_128(T_LONG, opcode, vtmp2, src2);
movdq(vtmp1, src1);
- reduce_operation_128(opcode, vtmp1, vtmp2);
+ reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2);
movdq(dst, vtmp1);
}
void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti128_high(vtmp1, src2);
- reduce_operation_128(opcode, vtmp1, src2);
+ reduce_operation_128(T_LONG, opcode, vtmp1, src2);
reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2);
}
void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
vextracti64x4_high(vtmp2, src2);
- reduce_operation_256(opcode, vtmp2, vtmp2, src2);
+ reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2);
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
#endif // _LP64
void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
- reduce_operation_128(opcode, dst, src);
+ reduce_operation_128(T_FLOAT, opcode, dst, src);
pshufd(vtmp, src, 0x1);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
reduce2F(opcode, dst, src, vtmp);
pshufd(vtmp, src, 0x2);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
pshufd(vtmp, src, 0x3);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_FLOAT, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1247,9 +1920,9 @@ void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src,
}
void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) {
- reduce_operation_128(opcode, dst, src);
+ reduce_operation_128(T_DOUBLE, opcode, dst, src);
pshufd(vtmp, src, 0xE);
- reduce_operation_128(opcode, dst, vtmp);
+ reduce_operation_128(T_DOUBLE, opcode, dst, vtmp);
}
void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) {
@@ -1264,6 +1937,207 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X
reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2);
}
+void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ XMMRegister xmm_0, XMMRegister xmm_1) {
+ int permconst[] = {1, 14};
+ XMMRegister wsrc = src;
+ XMMRegister wdst = xmm_0;
+ XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
+
+ int vlen_enc = Assembler::AVX_128bit;
+ if (vlen == 16) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+
+ for (int i = log2(vlen) - 1; i >=0; i--) {
+ if (i == 0 && !is_dst_valid) {
+ wdst = dst;
+ }
+ if (i == 3) {
+ vextracti64x4_high(wtmp, wsrc);
+ } else if (i == 2) {
+ vextracti128_high(wtmp, wsrc);
+ } else { // i = [0,1]
+ vpermilps(wtmp, wsrc, permconst[i], vlen_enc);
+ }
+ vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
+ wsrc = wdst;
+ vlen_enc = Assembler::AVX_128bit;
+ }
+ if (is_dst_valid) {
+ vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
+ }
+}
+
+void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ XMMRegister xmm_0, XMMRegister xmm_1) {
+ XMMRegister wsrc = src;
+ XMMRegister wdst = xmm_0;
+ XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1;
+ int vlen_enc = Assembler::AVX_128bit;
+ if (vlen == 8) {
+ vlen_enc = Assembler::AVX_256bit;
+ }
+ for (int i = log2(vlen) - 1; i >=0; i--) {
+ if (i == 0 && !is_dst_valid) {
+ wdst = dst;
+ }
+ if (i == 1) {
+ vextracti128_high(wtmp, wsrc);
+ } else if (i == 2) {
+ vextracti64x4_high(wtmp, wsrc);
+ } else {
+ assert(i == 0, "%d", i);
+ vpermilpd(wtmp, wsrc, 1, vlen_enc);
+ }
+ vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc);
+ wsrc = wdst;
+ vlen_enc = Assembler::AVX_128bit;
+ }
+ if (is_dst_valid) {
+ vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit);
+ }
+}
+
+void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) {
+ switch (bt) {
+ case T_BYTE: pextrb(dst, src, idx); break;
+ case T_SHORT: pextrw(dst, src, idx); break;
+ case T_INT: pextrd(dst, src, idx); break;
+ case T_LONG: pextrq(dst, src, idx); break;
+
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int lane = elemindex / elem_per_lane;
+ int eindex = elemindex % elem_per_lane;
+
+ if (lane >= 2) {
+ assert(UseAVX > 2, "required");
+ vextractf32x4(dst, src, lane & 3);
+ return dst;
+ } else if (lane > 0) {
+ assert(UseAVX > 0, "required");
+ vextractf128(dst, src, lane);
+ return dst;
+ } else {
+ return src;
+ }
+}
+
+void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int eindex = elemindex % elem_per_lane;
+ assert(is_integral_type(typ),"required");
+
+ if (eindex == 0) {
+ if (typ == T_LONG) {
+ movq(dst, src);
+ } else {
+ movdl(dst, src);
+ if (typ == T_BYTE)
+ movsbl(dst, dst);
+ else if (typ == T_SHORT)
+ movswl(dst, dst);
+ }
+ } else {
+ extract(typ, dst, src, eindex);
+ }
+}
+
+void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) {
+ int esize = type2aelembytes(typ);
+ int elem_per_lane = 16/esize;
+ int eindex = elemindex % elem_per_lane;
+ assert((typ == T_FLOAT || typ == T_DOUBLE),"required");
+
+ if (eindex == 0) {
+ movq(dst, src);
+ } else {
+ if (typ == T_FLOAT) {
+ if (UseAVX == 0) {
+ movdqu(dst, src);
+ pshufps(dst, dst, eindex);
+ } else {
+ vpshufps(dst, src, src, eindex, Assembler::AVX_128bit);
+ }
+ } else {
+ if (UseAVX == 0) {
+ movdqu(dst, src);
+ psrldq(dst, eindex*esize);
+ } else {
+ vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit);
+ }
+ movq(dst, dst);
+ }
+ }
+ // Zero upper bits
+ if (typ == T_FLOAT) {
+ if (UseAVX == 0) {
+ assert((vtmp != xnoreg) && (tmp != noreg), "required.");
+ movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp);
+ pand(dst, vtmp);
+ } else {
+ assert((tmp != noreg), "required.");
+ vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp);
+ }
+ }
+}
+
+void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) {
+ switch(typ) {
+ case T_BYTE:
+ evpcmpb(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_SHORT:
+ evpcmpw(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_INT:
+ case T_FLOAT:
+ evpcmpd(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ case T_LONG:
+ case T_DOUBLE:
+ evpcmpq(kdmask, ksmask, src1, adr, comparison, vector_len, scratch);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
+void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
+ switch(typ) {
+ case T_BYTE:
+ evpblendmb(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_SHORT:
+ evpblendmw(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_INT:
+ case T_FLOAT:
+ evpblendmd(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ case T_LONG:
+ case T_DOUBLE:
+ evpblendmq(dst, kmask, src1, src2, merge, vector_len);
+ break;
+ default:
+ assert(false,"Should not reach here.");
+ break;
+ }
+}
+
//-------------------------------------------------------------------------------------------
// IndexOf for constant substrings with size >= 8 chars
@@ -1850,7 +2724,7 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist
pmovmskb(tmp, vec3);
}
bsfl(ch, tmp);
- addl(result, ch);
+ addptr(result, ch);
bind(FOUND_SEQ_CHAR);
subptr(result, str1);
@@ -1859,6 +2733,99 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist
bind(DONE_LABEL);
} // string_indexof_char
+void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
+ ShortBranchVerifier sbv(this);
+ assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required");
+
+ int stride = 16;
+
+ Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP,
+ SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP,
+ RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT,
+ FOUND_SEQ_CHAR, DONE_LABEL;
+
+ movptr(result, str1);
+ if (UseAVX >= 2) {
+ cmpl(cnt1, stride);
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);
+ cmpl(cnt1, stride*2);
+ jcc(Assembler::less, SCAN_TO_16_CHAR_INIT);
+ movdl(vec1, ch);
+ vpbroadcastb(vec1, vec1, Assembler::AVX_256bit);
+ vpxor(vec2, vec2);
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFE0); //vector count (in chars)
+ andl(cnt1,0x0000001F); //tail count (in chars)
+
+ bind(SCAN_TO_32_CHAR_LOOP);
+ vmovdqu(vec3, Address(result, 0));
+ vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit);
+ vptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 32);
+ subl(tmp, stride*2);
+ jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP);
+ jmp(SCAN_TO_16_CHAR);
+
+ bind(SCAN_TO_16_CHAR_INIT);
+ movdl(vec1, ch);
+ pxor(vec2, vec2);
+ pshufb(vec1, vec2);
+ }
+
+ bind(SCAN_TO_16_CHAR);
+ cmpl(cnt1, stride);
+ jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left
+ if (UseAVX < 2) {
+ movdl(vec1, ch);
+ pxor(vec2, vec2);
+ pshufb(vec1, vec2);
+ }
+ movl(tmp, cnt1);
+ andl(tmp, 0xFFFFFFF0); //vector count (in bytes)
+ andl(cnt1,0x0000000F); //tail count (in bytes)
+
+ bind(SCAN_TO_16_CHAR_LOOP);
+ movdqu(vec3, Address(result, 0));
+ pcmpeqb(vec3, vec1);
+ ptest(vec2, vec3);
+ jcc(Assembler::carryClear, FOUND_CHAR);
+ addptr(result, 16);
+ subl(tmp, stride);
+ jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items...
+
+ bind(SCAN_TO_CHAR_INIT);
+ testl(cnt1, cnt1);
+ jcc(Assembler::zero, RET_NOT_FOUND);
+ bind(SCAN_TO_CHAR_LOOP);
+ load_unsigned_byte(tmp, Address(result, 0));
+ cmpl(ch, tmp);
+ jccb(Assembler::equal, FOUND_SEQ_CHAR);
+ addptr(result, 1);
+ subl(cnt1, 1);
+ jccb(Assembler::zero, RET_NOT_FOUND);
+ jmp(SCAN_TO_CHAR_LOOP);
+
+ bind(RET_NOT_FOUND);
+ movl(result, -1);
+ jmpb(DONE_LABEL);
+
+ bind(FOUND_CHAR);
+ if (UseAVX >= 2) {
+ vpmovmskb(tmp, vec3);
+ } else {
+ pmovmskb(tmp, vec3);
+ }
+ bsfl(ch, tmp);
+ addptr(result, ch);
+
+ bind(FOUND_SEQ_CHAR);
+ subptr(result, str1);
+
+ bind(DONE_LABEL);
+} // stringL_indexof_char
+
// helper function for string_compare
void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
Address::ScaleFactor scale, Address::ScaleFactor scale1,
diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
index f16b193a21d..79ab55a75ad 100644
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@@ -28,6 +28,8 @@
// C2_MacroAssembler contains high-level macros for C2
public:
+ Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
+
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
@@ -71,25 +73,69 @@
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr);
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
+
+ void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src,
+ XMMRegister tmp = xnoreg);
+ void vpminmax(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister src1, XMMRegister src2,
+ int vlen_enc);
+
+ void vminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc);
+ void evminmax_fp(int opcode, BasicType elem_bt,
+ XMMRegister dst, XMMRegister a, XMMRegister b,
+ KRegister ktmp, XMMRegister atmp, XMMRegister btmp,
+ int vlen_enc);
+
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
- void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
+ void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+ void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
+
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftd_imm(int opcode, XMMRegister dst, int shift);
- void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
- void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
- void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
- void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister shift);
+ void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister shift);
void vshiftq_imm(int opcode, XMMRegister dst, int shift);
- void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len);
void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
- // Reductions for vectors of ints, longs, floats, and doubles.
+ void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc);
+ void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg);
+ void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
+ void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch);
+
+ void insert(BasicType typ, XMMRegister dst, Register val, int idx);
+ void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx);
+ void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len);
+ void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len);
+ void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len);
+
+ // extract
+ void extract(BasicType typ, Register dst, XMMRegister src, int idx);
+ XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex);
+ void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex);
+ void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg);
- // dst = src1 + reduce(op, src2) using vtmp as temps
+ // blend
+ void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
+ void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
+
+ void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt);
+ void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes);
+
+ // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
+
+ // dst = src1 reduce(op, src2) using vtmp as temps
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
@@ -99,38 +145,71 @@
void reduce_fp(int opcode, int vlen,
XMMRegister dst, XMMRegister src,
XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg);
+ void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
+ void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid,
+ XMMRegister dst, XMMRegister src,
+ XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg);
private:
void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Int Reduction
void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Byte Reduction
+ void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+
+ // Short Reduction
+ void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+ void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
+
+ // Long Reduction
#ifdef _LP64
void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#endif // _LP64
+ // Float Reduction
void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
+ // Double Reduction
void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp);
void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2);
- void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src);
- void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
+ // Base reduction instruction
+ void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src);
+ void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2);
public:
void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+ void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+ XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+
// IndexOf strings.
// Small strings are loaded through stack if they cross page boundary.
void string_indexof(Register str1, Register str2,
diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp
index 6513be7b53e..31e77b52568 100644
--- a/src/hotspot/cpu/x86/c2_globals_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp
@@ -31,8 +31,6 @@
// Sets the default values for platform dependent flags used by the server compiler.
// (see c2_globals.hpp). Alpha-sorted.
define_pd_global(bool, BackgroundCompilation, true);
-define_pd_global(bool, UseTLAB, true);
-define_pd_global(bool, ResizeTLAB, true);
define_pd_global(bool, CICompileOSR, true);
define_pd_global(bool, InlineIntrinsics, true);
define_pd_global(bool, PreferInterpreterNativeStubs, false);
diff --git a/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
new file mode 100644
index 00000000000..c3d4850a5db
--- /dev/null
+++ b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "opto/compile.hpp"
+#include "opto/node.hpp"
+#include "opto/output.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ masm.
+void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const {
+ assert(SharedRuntime::polling_page_return_handler_blob() != NULL,
+ "polling page return stub not created yet");
+ address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point();
+
+ RuntimeAddress callback_addr(stub);
+
+ __ bind(entry->_stub_label);
+ InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset);
+#ifdef _LP64
+ __ lea(rscratch1, safepoint_pc);
+ __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1);
+#else
+ const Register tmp1 = rcx;
+ const Register tmp2 = rdx;
+ __ push(tmp1);
+ __ push(tmp2);
+
+ __ lea(tmp1, safepoint_pc);
+ __ get_thread(tmp2);
+ __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1);
+
+ __ pop(tmp2);
+ __ pop(tmp1);
+#endif
+ __ jump(callback_addr);
+}
+#undef __
diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp
index c433eabf993..1e9bf12cd2b 100644
--- a/src/hotspot/cpu/x86/frame_x86.cpp
+++ b/src/hotspot/cpu/x86/frame_x86.cpp
@@ -36,6 +36,7 @@
#include "runtime/monitorChunk.hpp"
#include "runtime/os.inline.hpp"
#include "runtime/signature.hpp"
+#include "runtime/stackWatermarkSet.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/stubRoutines.hpp"
#include "vmreg_x86.inline.hpp"
@@ -469,8 +470,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const {
//------------------------------------------------------------------------------
-// frame::sender
-frame frame::sender(RegisterMap* map) const {
+// frame::sender_raw
+frame frame::sender_raw(RegisterMap* map) const {
// Default is we done have to follow them. The sender_for_xxx will
// update it accordingly
map->set_include_argument_oops(false);
@@ -487,6 +488,16 @@ frame frame::sender(RegisterMap* map) const {
return frame(sender_sp(), link(), sender_pc());
}
+frame frame::sender(RegisterMap* map) const {
+ frame result = sender_raw(map);
+
+ if (map->process_frames()) {
+ StackWatermarkSet::on_iteration(map->thread(), result);
+ }
+
+ return result;
+}
+
bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
assert(is_interpreted_frame(), "Not an interpreted frame");
// These are reasonable sanity checks
diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp
index ffe5e92275d..26dbb2aa956 100644
--- a/src/hotspot/cpu/x86/frame_x86.hpp
+++ b/src/hotspot/cpu/x86/frame_x86.hpp
@@ -156,4 +156,7 @@
static jint interpreter_frame_expression_stack_direction() { return -1; }
+ // returns the sending frame, without applying any barriers
+ frame sender_raw(RegisterMap* map) const;
+
#endif // CPU_X86_FRAME_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
index 58dcd9ed5fb..2aac0608207 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp
@@ -111,7 +111,8 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt
__ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr);
if (access.is_oop()) {
- result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false);
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(access.decorators(), access.type());
+ result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), kind);
LIR_Opr tmp = gen->new_register(type);
__ move(result, tmp);
result = tmp;
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
index 5ce3cc95e93..40f16ef2731 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp
@@ -32,7 +32,6 @@
#include "gc/shenandoah/shenandoahThreadLocalData.hpp"
#include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp"
#include "interpreter/interpreter.hpp"
-#include "interpreter/interp_masm.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/thread.hpp"
#include "utilities/macros.hpp"
@@ -44,8 +43,6 @@
#define __ masm->
-address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL;
-
static void save_xmm_registers(MacroAssembler* masm) {
__ subptr(rsp, 64);
__ movdbl(Address(rsp, 0), xmm0);
@@ -271,11 +268,14 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm,
__ bind(done);
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) {
+void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind) {
assert(ShenandoahLoadRefBarrier, "Should be enabled");
- Label done;
+ Label heap_stable, not_cset;
+ __ block_comment("load_reference_barrier { ");
+
+ // Check if GC is active
#ifdef _LP64
Register thread = r15_thread;
#else
@@ -289,138 +289,130 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl
Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
__ testb(gc_state, ShenandoahHeap::HAS_FORWARDED);
- __ jccb(Assembler::zero, done);
-
- // Use rsi for src address
- const Register src_addr = rsi;
- // Setup address parameter first, if it does not clobber oop in dst
- bool need_addr_setup = (src_addr != dst);
-
- if (need_addr_setup) {
- __ push(src_addr);
- __ lea(src_addr, src);
-
- if (dst != rax) {
- // Move obj into rax and save rax
- __ push(rax);
- __ movptr(rax, dst);
- }
- } else {
- // dst == rsi
- __ push(rax);
- __ movptr(rax, dst);
-
- // we can clobber it, since it is outgoing register
- __ lea(src_addr, src);
- }
-
- save_xmm_registers(masm);
- __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb())));
- restore_xmm_registers(masm);
-
- if (need_addr_setup) {
- if (dst != rax) {
- __ movptr(dst, rax);
- __ pop(rax);
+ __ jcc(Assembler::zero, heap_stable);
+
+ Register tmp1 = noreg, tmp2 = noreg;
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ // Test for object in cset
+ // Allocate temporary registers
+ for (int i = 0; i < 8; i++) {
+ Register r = as_Register(i);
+ if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) {
+ if (tmp1 == noreg) {
+ tmp1 = r;
+ } else {
+ tmp2 = r;
+ break;
+ }
+ }
}
- __ pop(src_addr);
- } else {
- __ movptr(dst, rax);
- __ pop(rax);
+ assert(tmp1 != noreg, "tmp1 allocated");
+ assert(tmp2 != noreg, "tmp2 allocated");
+ assert_different_registers(tmp1, tmp2, src.base(), src.index());
+ assert_different_registers(tmp1, tmp2, dst);
+
+ __ push(tmp1);
+ __ push(tmp2);
+
+ // Optimized cset-test
+ __ movptr(tmp1, dst);
+ __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
+ __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
+ __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1));
+ __ testbool(tmp1);
+ __ jcc(Assembler::zero, not_cset);
+ }
+
+ uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4);
+ __ subptr(rsp, num_saved_regs * wordSize);
+ uint slot = num_saved_regs;
+ if (dst != rax) {
+ __ movptr(Address(rsp, (--slot) * wordSize), rax);
}
-
- __ bind(done);
-
-#ifndef _LP64
- __ pop(thread);
+ __ movptr(Address(rsp, (--slot) * wordSize), rcx);
+ __ movptr(Address(rsp, (--slot) * wordSize), rdx);
+ __ movptr(Address(rsp, (--slot) * wordSize), rdi);
+ __ movptr(Address(rsp, (--slot) * wordSize), rsi);
+#ifdef _LP64
+ __ movptr(Address(rsp, (--slot) * wordSize), r8);
+ __ movptr(Address(rsp, (--slot) * wordSize), r9);
+ __ movptr(Address(rsp, (--slot) * wordSize), r10);
+ __ movptr(Address(rsp, (--slot) * wordSize), r11);
+ // r12-r15 are callee saved in all calling conventions
#endif
-}
-
-void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) {
- if (!ShenandoahLoadRefBarrier) {
- return;
- }
-
- Label done;
- Label not_null;
- Label slow_path;
- __ block_comment("load_reference_barrier_native { ");
-
- // null check
- __ testptr(dst, dst);
- __ jcc(Assembler::notZero, not_null);
- __ jmp(done);
- __ bind(not_null);
-
+ assert(slot == 0, "must use all slots");
+ // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1.
#ifdef _LP64
- Register thread = r15_thread;
+ Register arg0 = c_rarg0, arg1 = c_rarg1;
#else
- Register thread = rcx;
- if (thread == dst) {
- thread = rbx;
- }
- __ push(thread);
- __ get_thread(thread);
-#endif
- assert_different_registers(dst, thread);
-
- Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset()));
- __ testb(gc_state, ShenandoahHeap::EVACUATION);
-#ifndef _LP64
- __ pop(thread);
+ Register arg0 = rdi, arg1 = rsi;
#endif
- __ jccb(Assembler::notZero, slow_path);
- __ jmp(done);
- __ bind(slow_path);
-
- if (dst != rax) {
- __ push(rax);
+ if (dst == arg1) {
+ __ lea(arg0, src);
+ __ xchgptr(arg1, arg0);
+ } else {
+ __ lea(arg1, src);
+ __ movptr(arg0, dst);
}
- __ push(rcx);
- __ push(rdx);
- __ push(rdi);
- __ push(rsi);
-#ifdef _LP64
- __ push(r8);
- __ push(r9);
- __ push(r10);
- __ push(r11);
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
-#endif
-
- assert_different_registers(dst, rsi);
- __ lea(rsi, src);
save_xmm_registers(masm);
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), arg0, arg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), arg0, arg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1);
+ } else {
+ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1);
+ break;
+ default:
+ ShouldNotReachHere();
+ }
restore_xmm_registers(masm);
#ifdef _LP64
- __ pop(r15);
- __ pop(r14);
- __ pop(r13);
- __ pop(r12);
- __ pop(r11);
- __ pop(r10);
- __ pop(r9);
- __ pop(r8);
+ __ movptr(r11, Address(rsp, (slot++) * wordSize));
+ __ movptr(r10, Address(rsp, (slot++) * wordSize));
+ __ movptr(r9, Address(rsp, (slot++) * wordSize));
+ __ movptr(r8, Address(rsp, (slot++) * wordSize));
#endif
- __ pop(rsi);
- __ pop(rdi);
- __ pop(rdx);
- __ pop(rcx);
+ __ movptr(rsi, Address(rsp, (slot++) * wordSize));
+ __ movptr(rdi, Address(rsp, (slot++) * wordSize));
+ __ movptr(rdx, Address(rsp, (slot++) * wordSize));
+ __ movptr(rcx, Address(rsp, (slot++) * wordSize));
if (dst != rax) {
__ movptr(dst, rax);
- __ pop(rax);
+ __ movptr(rax, Address(rsp, (slot++) * wordSize));
}
- __ bind(done);
- __ block_comment("load_reference_barrier_native { ");
+ assert(slot == num_saved_regs, "must use all slots");
+ __ addptr(rsp, num_saved_regs * wordSize);
+
+ __ bind(not_cset);
+
+ if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) {
+ __ pop(tmp2);
+ __ pop(tmp1);
+ }
+
+ __ bind(heap_stable);
+
+ __ block_comment("} load_reference_barrier");
+
+#ifndef _LP64
+ __ pop(thread);
+#endif
}
void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) {
@@ -464,16 +456,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm,
}
}
-void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) {
- if (ShenandoahLoadRefBarrier) {
- Label done;
- __ testptr(dst, dst);
- __ jcc(Assembler::zero, done);
- load_reference_barrier_not_null(masm, dst, src);
- __ bind(done);
- }
-}
-
//
// Arguments:
//
@@ -504,7 +486,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
// Preserve src location for LRB
if (dst == src.base() || dst == src.index()) {
- // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
+ // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at()
if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) {
dst = tmp1;
use_tmp1_for_dst = true;
@@ -517,11 +499,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d
BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
- if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) {
- load_reference_barrier_native(masm, dst, src);
- } else {
- load_reference_barrier(masm, dst, src);
- }
+ ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type);
+ load_reference_barrier(masm, dst, src, kind);
// Move loaded oop to final destination
if (dst != result_dst) {
@@ -638,7 +617,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm,
bool exchange, Register tmp1, Register tmp2) {
assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled");
assert(oldval == rax, "must be in rax for implicit use in cmpxchg");
- assert_different_registers(oldval, newval, tmp1, tmp2);
+ assert_different_registers(oldval, tmp1, tmp2);
+ assert_different_registers(newval, tmp1, tmp2);
Label L_success, L_failure;
@@ -870,10 +850,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble
__ bind(slow_path);
ce->store_parameter(res, 0);
ce->store_parameter(addr, 1);
- if (stub->is_native()) {
- __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
- } else {
- __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin()));
+ switch (stub->kind()) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin()));
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin()));
+ break;
+ default:
+ ShouldNotReachHere();
}
__ jmp(*stub->continuation());
}
@@ -938,7 +926,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss
__ epilogue();
}
-void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) {
+void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) {
__ prologue("shenandoah_load_reference_barrier", false);
// arg0 : object to be resolved
@@ -947,20 +935,40 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#ifdef _LP64
__ load_parameter(0, c_rarg0);
__ load_parameter(1, c_rarg1);
- if (is_native) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), c_rarg0, c_rarg1);
- } else if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ if (UseCompressedOops) {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1);
+ } else {
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
+ }
+ break;
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1);
+ break;
+ default:
+ ShouldNotReachHere();
}
#else
__ load_parameter(0, rax);
__ load_parameter(1, rbx);
- if (is_native) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rax, rbx);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
+ switch (kind) {
+ case ShenandoahBarrierSet::AccessKind::NORMAL:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx);
+ break;
+ case ShenandoahBarrierSet::AccessKind::WEAK:
+ case ShenandoahBarrierSet::AccessKind::NATIVE:
+ __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx);
+ break;
+ default:
+ ShouldNotReachHere();
}
#endif
@@ -972,104 +980,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s
#undef __
#endif // COMPILER1
-
-address ShenandoahBarrierSetAssembler::shenandoah_lrb() {
- assert(_shenandoah_lrb != NULL, "need load reference barrier stub");
- return _shenandoah_lrb;
-}
-
-#define __ cgen->assembler()->
-
-/*
- * Incoming parameters:
- * rax: oop
- * rsi: load address
- */
-address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) {
- __ align(CodeEntryAlignment);
- StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb");
- address start = __ pc();
-
- Label slow_path;
-
- // We use RDI, which also serves as argument register for slow call.
- // RAX always holds the src object ptr, except after the slow call,
- // then it holds the result. R8/RBX is used as temporary register.
-
- Register tmp1 = rdi;
- Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx);
-
- __ push(tmp1);
- __ push(tmp2);
-
- // Check for object being in the collection set.
- __ mov(tmp1, rax);
- __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint());
- __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr());
- __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1));
- __ testbool(tmp2);
- __ jccb(Assembler::notZero, slow_path);
- __ pop(tmp2);
- __ pop(tmp1);
- __ ret(0);
-
- __ bind(slow_path);
-
- __ push(rcx);
- __ push(rdx);
- __ push(rdi);
-#ifdef _LP64
- __ push(r8);
- __ push(r9);
- __ push(r10);
- __ push(r11);
- __ push(r12);
- __ push(r13);
- __ push(r14);
- __ push(r15);
-#endif
- __ push(rbp);
- __ movptr(rbp, rsp);
- __ andptr(rsp, -StackAlignmentInBytes);
- __ push_FPU_state();
- if (UseCompressedOops) {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi);
- } else {
- __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi);
- }
- __ pop_FPU_state();
- __ movptr(rsp, rbp);
- __ pop(rbp);
-#ifdef _LP64
- __ pop(r15);
- __ pop(r14);
- __ pop(r13);
- __ pop(r12);
- __ pop(r11);
- __ pop(r10);
- __ pop(r9);
- __ pop(r8);
-#endif
- __ pop(rdi);
- __ pop(rdx);
- __ pop(rcx);
-
- __ pop(tmp2);
- __ pop(tmp1);
- __ ret(0);
-
- return start;
-}
-
-#undef __
-
-void ShenandoahBarrierSetAssembler::barrier_stubs_init() {
- if (ShenandoahLoadRefBarrier) {
- int stub_code_size = 4096;
- ResourceMark rm;
- BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size);
- CodeBuffer buf(bb);
- StubCodeGenerator cgen(&buf);
- _shenandoah_lrb = generate_shenandoah_lrb(&cgen);
- }
-}
diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
index 60aa3b4600d..108b5670206 100644
--- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp
@@ -27,6 +27,8 @@
#include "asm/macroAssembler.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shenandoah/shenandoahBarrierSet.hpp"
+
#ifdef COMPILER1
class LIR_Assembler;
class ShenandoahPreBarrierStub;
@@ -38,8 +40,6 @@ class StubCodeGenerator;
class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
private:
- static address _shenandoah_lrb;
-
void satb_write_barrier_pre(MacroAssembler* masm,
Register obj,
Register pre_val,
@@ -56,25 +56,18 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
bool tosca_live,
bool expand_call);
- void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src);
-
void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp);
- address generate_shenandoah_lrb(StubCodeGenerator* cgen);
-
public:
- static address shenandoah_lrb();
-
void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp);
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);
void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
- void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native);
+ void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind);
#endif
- void load_reference_barrier(MacroAssembler* masm, Register dst, Address src);
- void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src);
+ void load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind);
void cmpxchg_oop(MacroAssembler* masm,
Register res, Address addr, Register oldval, Register newval,
@@ -87,8 +80,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler {
Address dst, Register val, Register tmp1, Register tmp2);
virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
Register obj, Register tmp, Label& slowpath);
- virtual void barrier_stubs_init();
-
};
#endif // CPU_X86_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_X86_HPP
diff --git a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
index 83c8caa6a58..db558d8cb2a 100644
--- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
+++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp
@@ -24,10 +24,9 @@
#ifndef CPU_X86_GC_Z_ZGLOBALS_X86_HPP
#define CPU_X86_GC_Z_ZGLOBALS_X86_HPP
-const size_t ZPlatformGranuleSizeShift = 21; // 2MB
-const size_t ZPlatformHeapViews = 3;
-const size_t ZPlatformNMethodDisarmedOffset = 4;
-const size_t ZPlatformCacheLineSize = 64;
+const size_t ZPlatformGranuleSizeShift = 21; // 2MB
+const size_t ZPlatformHeapViews = 3;
+const size_t ZPlatformCacheLineSize = 64;
size_t ZPlatformAddressOffsetBits();
size_t ZPlatformAddressMetadataShift();
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp
index 738771e800a..140dcfc2f06 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.cpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp
@@ -605,6 +605,10 @@ void InterpreterMacroAssembler::push_i(Register r) {
push(r);
}
+void InterpreterMacroAssembler::push_i_or_ptr(Register r) {
+ push(r);
+}
+
void InterpreterMacroAssembler::push_f(XMMRegister r) {
subptr(rsp, wordSize);
movflt(Address(rsp, 0), r);
@@ -853,7 +857,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
Label no_safepoint, dispatch;
if (table != safepoint_table && generate_poll) {
NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
- testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(r15_thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
lea(rscratch1, ExternalAddress((address)safepoint_table));
@@ -872,7 +876,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state,
Label no_safepoint;
const Register thread = rcx;
get_thread(thread);
- testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jccb(Assembler::zero, no_safepoint);
ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index);
@@ -961,6 +965,7 @@ void InterpreterMacroAssembler::narrow(Register result) {
// remove activation
//
+// Apply stack watermark barrier.
// Unlock the receiver if this is a synchronized method.
// Unlock any Java monitors from syncronized blocks.
// Remove the activation from the stack.
@@ -987,7 +992,23 @@ void InterpreterMacroAssembler::remove_activation(
const Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rcx);
// monitor pointers need different register
// because rdx may have the result in it
- NOT_LP64(get_thread(rcx);)
+ NOT_LP64(get_thread(rthread);)
+
+ // The below poll is for the stack watermark barrier. It allows fixing up frames lazily,
+ // that would normally not be safe to use. Such bad returns into unsafe territory of
+ // the stack, will call InterpreterRuntime::at_unwind.
+ Label slow_path;
+ Label fast_path;
+ safepoint_poll(slow_path, rthread, true /* at_return */, false /* in_nmethod */);
+ jmp(fast_path);
+ bind(slow_path);
+ push(state);
+ set_last_Java_frame(rthread, noreg, rbp, (address)pc());
+ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread);
+ NOT_LP64(get_thread(rthread);) // call_VM clobbered it, restore
+ reset_last_Java_frame(rthread, true);
+ pop(state);
+ bind(fast_path);
// get the value of _do_not_unlock_if_synchronized into rdx
const Address do_not_unlock_if_synchronized(rthread,
@@ -1938,7 +1959,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index,
void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) {
if (state == atos) {
- MacroAssembler::_verify_oop(reg, "broken oop", file, line);
+ MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line);
}
}
diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp
index 3e2e33278a1..288b1bd1dfe 100644
--- a/src/hotspot/cpu/x86/interp_masm_x86.hpp
+++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp
@@ -139,9 +139,18 @@ class InterpreterMacroAssembler: public MacroAssembler {
// Expression stack
void pop_ptr(Register r = rax);
void pop_i(Register r = rax);
+
+ // On x86, pushing a ptr or an int is semantically identical, but we
+ // maintain a distinction for clarity and for making it easier to change
+ // semantics in the future
void push_ptr(Register r = rax);
void push_i(Register r = rax);
+ // push_i_or_ptr is provided for when explicitly allowing either a ptr or
+ // an int might have some advantage, while still documenting the fact that a
+ // ptr might be pushed to the stack.
+ void push_i_or_ptr(Register r = rax);
+
void push_f(XMMRegister r);
void pop_f(XMMRegister r);
void pop_d(XMMRegister r);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
index 8b19ddab7b8..d7fabfbbedb 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp
@@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
+
void MacroAssembler::cmpklass(Register src1, Metadata* obj) {
cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate());
}
@@ -369,11 +370,6 @@ void MacroAssembler::pushptr(AddressLiteral src) {
}
}
-void MacroAssembler::set_word_if_not_zero(Register dst) {
- xorl(dst, dst);
- set_byte_if_not_zero(dst);
-}
-
static void pass_arg0(MacroAssembler* masm, Register arg) {
masm->push(arg);
}
@@ -713,8 +709,12 @@ void MacroAssembler::movptr(Register dst, ArrayAddress src) {
// src should NEVER be a real pointer. Use AddressLiteral for true pointers
void MacroAssembler::movptr(Address dst, intptr_t src) {
- mov64(rscratch1, src);
- movq(dst, rscratch1);
+ if (is_simm32(src)) {
+ movptr(dst, checked_cast(src));
+ } else {
+ mov64(rscratch1, src);
+ movq(dst, rscratch1);
+ }
}
// These are mostly for initializing NULL
@@ -2495,6 +2495,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) {
void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::movdqu(dst, src);
}
@@ -2519,6 +2520,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+ if (dst->encoding() == src->encoding()) return;
Assembler::vmovdqu(dst, src);
}
@@ -2532,6 +2534,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
}
+
+void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
+ if (reachable(src)) {
+ kmovwl(dst, as_Address(src));
+ } else {
+ lea(scratch_reg, src);
+ kmovwl(dst, Address(scratch_reg, 0));
+ }
+}
+
+void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, as_Address(src), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len);
+ }
+ } else {
+ lea(scratch_reg, src);
+ if (mask == k0) {
+ Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len);
+ } else {
+ Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+ }
+}
+
+void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
+void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
+void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge,
+ int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) {
if (reachable(src)) {
Assembler::evmovdquq(dst, as_Address(src), vector_len);
@@ -2699,16 +2759,15 @@ void MacroAssembler::save_rax(Register tmp) {
else if (tmp != rax) mov(tmp, rax);
}
-void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {
-#ifdef _LP64
- assert(thread_reg == r15_thread, "should be");
-#else
- if (thread_reg == noreg) {
- thread_reg = temp_reg;
- get_thread(thread_reg);
+void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) {
+ if (at_return) {
+ // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore,
+ // we may safely use rsp instead to perform the stack watermark check.
+ cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, Thread::polling_word_offset()));
+ jcc(Assembler::above, slow_path);
+ return;
}
-#endif
- testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
+ testb(Address(thread_reg, Thread::polling_word_offset()), SafepointMechanism::poll_bit());
jcc(Assembler::notZero, slow_path); // handshake bit set implies poll
}
@@ -3018,6 +3077,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src,
Assembler::vpcmpeqw(dst, nds, src, vector_len);
}
+void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds,
+ AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len);
+ }
+}
+
+void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) {
+ if (width == Assembler::Q) {
+ Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len);
+ } else {
+ Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len);
+ }
+}
+
+void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) {
+ int eq_cond_enc = 0x29;
+ int gt_cond_enc = 0x37;
+ if (width != Assembler::Q) {
+ eq_cond_enc = 0x74 + width;
+ gt_cond_enc = 0x64 + width;
+ }
+ switch (cond) {
+ case eq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ break;
+ case neq:
+ vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case le:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case nlt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg);
+ break;
+ case lt:
+ vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len);
+ break;
+ case nle:
+ vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len);
+ break;
+ default:
+ assert(false, "Should not reach here");
+ }
+}
+
void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
Assembler::vpmovzxbw(dst, src, vector_len);
@@ -3142,6 +3293,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src
}
}
+void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ bool merge, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len);
+ }
+}
+
void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
if (reachable(src)) {
vdivsd(dst, nds, as_Address(src));
@@ -3238,7 +3399,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src,
}
}
-//-------------------------------------------------------------------------------------------
+void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
+ if (reachable(src)) {
+ Assembler::vpermd(dst, nds, as_Address(src), vector_len);
+ } else {
+ lea(scratch_reg, src);
+ Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len);
+ }
+}
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask);
@@ -3761,44 +3929,6 @@ void MacroAssembler::vallones(XMMRegister dst, int vector_len) {
}
}
-RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset) {
- intptr_t value = *delayed_value_addr;
- if (value != 0)
- return RegisterOrConstant(value + offset);
-
- // load indirectly to solve generation ordering problem
- movptr(tmp, ExternalAddress((address) delayed_value_addr));
-
-#ifdef ASSERT
- { Label L;
- testptr(tmp, tmp);
- if (WizardMode) {
- const char* buf = NULL;
- {
- ResourceMark rm;
- stringStream ss;
- ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]);
- buf = code_string(ss.as_string());
- }
- jcc(Assembler::notZero, L);
- STOP(buf);
- } else {
- jccb(Assembler::notZero, L);
- hlt();
- }
- bind(L);
- }
-#endif
-
- if (offset != 0)
- addptr(tmp, offset);
-
- return RegisterOrConstant(tmp);
-}
-
-
Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
int extra_slot_offset) {
// cf. TemplateTable::prepare_invoke(), if (load_receiver).
@@ -3820,7 +3950,6 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
return Address(rsp, scale_reg, scale_factor, offset);
}
-
void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
if (!VerifyOops) return;
@@ -3913,6 +4042,9 @@ class ControlWord {
case 1: rc = "round down"; break;
case 2: rc = "round up "; break;
case 3: rc = "chop "; break;
+ default:
+ rc = NULL; // silence compiler warnings
+ fatal("Unknown rounding control: %d", rounding_control());
};
// precision control
const char* pc;
@@ -3921,6 +4053,9 @@ class ControlWord {
case 1: pc = "reserved"; break;
case 2: pc = "53 bits "; break;
case 3: pc = "64 bits "; break;
+ default:
+ pc = NULL; // silence compiler warnings
+ fatal("Unknown precision control: %d", precision_control());
};
// flags
char f[9];
@@ -5764,7 +5899,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
bind(VECTOR64_LOOP);
// AVX512 code to compare 64 byte vectors.
- evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit);
+ evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
kortestql(k7, k7);
jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch
@@ -5783,7 +5918,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
notq(tmp2);
kmovql(k3, tmp2);
- evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
+ evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit);
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
ktestql(k7, k3);
@@ -7578,7 +7713,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
notl(result);
kmovdl(k3, result);
- evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@@ -7603,7 +7738,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
negptr(len);
bind(copy_32_loop);
- evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
jcc(Assembler::carryClear, return_zero);
@@ -7628,7 +7763,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
kmovdl(k3, result);
- evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
+ evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
jcc(Assembler::carryClear, return_zero);
@@ -7773,7 +7908,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
// inflate 32 chars per iter
bind(copy_32_loop);
vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
- evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
+ evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit);
addptr(len, 32);
jcc(Assembler::notZero, copy_32_loop);
@@ -7788,7 +7923,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
- evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
+ evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);
@@ -7963,6 +8098,7 @@ void MacroAssembler::cache_wbsync(bool is_pre)
sfence();
}
}
+
#endif // _LP64
Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
index 3d009d69945..e7419fc916b 100644
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp
@@ -583,22 +583,30 @@ class MacroAssembler: public Assembler {
// method handles (JSR 292)
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
- //----
- void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
-
// Debugging
// only if +VerifyOops
void _verify_oop(Register reg, const char* s, const char* file, int line);
void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
+ void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
+ if (VerifyOops) {
+ _verify_oop(reg, s, file, line);
+ }
+ }
+ void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
+ if (VerifyOops) {
+ _verify_oop_addr(reg, s, file, line);
+ }
+ }
+
// TODO: verify method and klass metadata (compare against vptr?)
void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
-#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__)
-#define verify_oop_msg(reg, msg) _verify_oop(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
-#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr " #addr, __FILE__, __LINE__)
+#define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
+#define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
+#define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
@@ -643,13 +651,7 @@ class MacroAssembler: public Assembler {
// Check for reserved stack access in method being exited (for JIT)
void reserved_stack_check();
- virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
- Register tmp,
- int offset);
-
- // If thread_reg is != noreg the code assumes the register passed contains
- // the thread (required on 64 bit).
- void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
+ void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod);
void verify_tlab();
@@ -1078,15 +1080,59 @@ class MacroAssembler: public Assembler {
void movdqu(XMMRegister dst, Address src);
void movdqu(XMMRegister dst, XMMRegister src);
void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1);
+
+ void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
+ void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
+
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
void vmovdqu(XMMRegister dst, Address src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
+
+ // AVX512 Unaligned
+ void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); }
+ void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
+ void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
+ void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); }
+ void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
+ void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
+ void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); }
+ void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
+ if (dst->encoding() == src->encoding()) return;
+ Assembler::evmovdqul(dst, src, vector_len);
+ }
+ void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
+ void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); }
+ void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ if (dst->encoding() == src->encoding() && mask == k0) return;
+ Assembler::evmovdqul(dst, mask, src, merge, vector_len);
+ }
+ void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
- void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch);
+ void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) {
+ if (dst->encoding() == src->encoding()) return;
+ Assembler::evmovdquq(dst, src, vector_len);
+ }
+ void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
+ void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); }
+ void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
+ if (dst->encoding() == src->encoding() && mask == k0) return;
+ Assembler::evmovdquq(dst, mask, src, merge, vector_len);
+ }
+ void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
// Move Aligned Double Quadword
void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); }
@@ -1208,6 +1254,30 @@ class MacroAssembler: public Assembler {
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+ void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
+
+ // Vector compares
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
+ int comparison, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, vector_len); }
+ void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src,
+ int comparison, int vector_len, Register scratch_reg);
+
+
+ // Emit comparison instruction for the specified comparison predicate.
+ void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg);
+ void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len);
void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); }
@@ -1236,6 +1306,7 @@ class MacroAssembler: public Assembler {
void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
void vptest(XMMRegister dst, XMMRegister src);
+ void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); }
void punpcklbw(XMMRegister dst, XMMRegister src);
void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); }
@@ -1254,6 +1325,8 @@ class MacroAssembler: public Assembler {
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
+ void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg);
+
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
@@ -1309,6 +1382,9 @@ class MacroAssembler: public Assembler {
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
+ void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); }
+ void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg);
+
void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
if (UseAVX > 2 && VM_Version::supports_avx512novl()) {
Assembler::vinserti32x4(dst, dst, src, imm8);
@@ -1727,6 +1803,35 @@ class MacroAssembler: public Assembler {
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
+
+#if COMPILER2_OR_JVMCI
+ void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register count, int shift,
+ Register index, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit);
+
+ void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register start_index, Register end_index,
+ Register count, int shift, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit);
+
+ void copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift = Address::times_1, int offset = 0,
+ bool use64byteVector = false);
+
+ void copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift = Address::times_1, int offset = 0);
+
+ void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ int shift = Address::times_1, int offset = 0);
+
+ void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ bool conjoint, int shift = Address::times_1, int offset = 0,
+ bool use64byteVector = false);
+#endif // COMPILER2_OR_JVMCI
+
#endif // _LP64
void vallones(XMMRegister dst, int vector_len);
diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp
new file mode 100644
index 00000000000..4368dee7329
--- /dev/null
+++ b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp
@@ -0,0 +1,253 @@
+/*
+* Copyright (c) 2020, Intel Corporation.
+*
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+#ifdef _LP64
+
+#if COMPILER2_OR_JVMCI
+
+void MacroAssembler::arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register count, int shift,
+ Register index, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit) {
+ Label L_entry_64, L_entry_96, L_entry_128;
+ Label L_entry_160, L_entry_192;
+
+ int size_mat[][6] = {
+ /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
+ /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
+ /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
+ /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
+ };
+
+ // Case A) Special case for length less than equal to 32 bytes.
+ cmpq(count, size_mat[shift][0]);
+ jccb(Assembler::greater, L_entry_64);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift);
+ jmp(L_exit);
+
+ // Case B) Special case for length less than equal to 64 bytes.
+ BIND(L_entry_64);
+ cmpq(count, size_mat[shift][1]);
+ jccb(Assembler::greater, L_entry_96);
+ copy64_masked_avx(to, from, xmm, mask, count, index, temp, shift, 0, use64byteVector);
+ jmp(L_exit);
+
+ // Case C) Special case for length less than equal to 96 bytes.
+ BIND(L_entry_96);
+ cmpq(count, size_mat[shift][2]);
+ jccb(Assembler::greater, L_entry_128);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ subq(count, 64 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 64);
+ jmp(L_exit);
+
+ // Case D) Special case for length less than equal to 128 bytes.
+ BIND(L_entry_128);
+ cmpq(count, size_mat[shift][3]);
+ jccb(Assembler::greater, L_entry_160);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy32_avx(to, from, index, xmm, shift, 64);
+ subq(count, 96 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 96);
+ jmp(L_exit);
+
+ // Case E) Special case for length less than equal to 160 bytes.
+ BIND(L_entry_160);
+ cmpq(count, size_mat[shift][4]);
+ jccb(Assembler::greater, L_entry_192);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
+ subq(count, 128 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 128);
+ jmp(L_exit);
+
+ // Case F) Special case for length less than equal to 192 bytes.
+ BIND(L_entry_192);
+ cmpq(count, size_mat[shift][5]);
+ jcc(Assembler::greater, L_entry);
+ copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector);
+ copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector);
+ copy32_avx(to, from, index, xmm, shift, 128);
+ subq(count, 160 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 160);
+ jmp(L_exit);
+}
+
+void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from,
+ Register to, Register start_index, Register end_index,
+ Register count, int shift, Register temp,
+ bool use64byteVector, Label& L_entry, Label& L_exit) {
+ Label L_entry_64, L_entry_96, L_entry_128;
+ Label L_entry_160, L_entry_192;
+ bool avx3 = MaxVectorSize > 32 && AVX3Threshold == 0;
+
+ int size_mat[][6] = {
+ /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 },
+ /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 },
+ /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 },
+ /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 }
+ };
+
+ // Case A) Special case for length less than equal to 32 bytes.
+ cmpq(count, size_mat[shift][0]);
+ jccb(Assembler::greater, L_entry_64);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case B) Special case for length less than equal to 64 bytes.
+ BIND(L_entry_64);
+ cmpq(count, size_mat[shift][1]);
+ jccb(Assembler::greater, L_entry_96);
+ if (avx3) {
+ copy64_masked_avx(to, from, xmm, mask, count, start_index, temp, shift, 0, true);
+ } else {
+ copy32_avx(to, from, end_index, xmm, shift, -32);
+ subq(count, 32 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ }
+ jmp(L_exit);
+
+ // Case C) Special case for length less than equal to 96 bytes.
+ BIND(L_entry_96);
+ cmpq(count, size_mat[shift][2]);
+ jccb(Assembler::greater, L_entry_128);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ subq(count, 64 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case D) Special case for length less than equal to 128 bytes.
+ BIND(L_entry_128);
+ cmpq(count, size_mat[shift][3]);
+ jccb(Assembler::greater, L_entry_160);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy32_avx(to, from, end_index, xmm, shift, -96);
+ subq(count, 96 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case E) Special case for length less than equal to 160 bytes.
+ BIND(L_entry_160);
+ cmpq(count, size_mat[shift][4]);
+ jccb(Assembler::greater, L_entry_192);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
+ subq(count, 128 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+
+ // Case F) Special case for length less than equal to 192 bytes.
+ BIND(L_entry_192);
+ cmpq(count, size_mat[shift][5]);
+ jcc(Assembler::greater, L_entry);
+ copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector);
+ copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector);
+ copy32_avx(to, from, end_index, xmm, shift, -160);
+ subq(count, 160 >> shift);
+ copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift);
+ jmp(L_exit);
+}
+
+void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift, int offset,
+ bool use64byteVector) {
+ BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ if (!use64byteVector) {
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ subptr(length, 32 >> shift);
+ copy32_masked_avx(dst, src, xmm, mask, length, index, temp, shift, offset+32);
+ } else {
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ assert(MaxVectorSize == 64, "vector length != 64");
+ negptr(length);
+ addq(length, 64);
+ mov64(temp, -1);
+ shrxq(temp, temp, length);
+ kmovql(mask, temp);
+ evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_512bit, type[shift]);
+ evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_512bit, type[shift]);
+ }
+}
+
+
+void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister xmm,
+ KRegister mask, Register length, Register index,
+ Register temp, int shift, int offset) {
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG};
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ mov64(temp, 1);
+ shlxq(temp, temp, length);
+ decq(temp);
+ kmovql(mask, temp);
+ evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_256bit, type[shift]);
+ evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_256bit, type[shift]);
+}
+
+
+void MacroAssembler::copy32_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ int shift, int offset) {
+ assert(MaxVectorSize >= 32, "vector length should be >= 32");
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ vmovdqu(xmm, Address(src, index, scale, offset));
+ vmovdqu(Address(dst, index, scale, offset), xmm);
+}
+
+
+void MacroAssembler::copy64_avx(Register dst, Register src, Register index, XMMRegister xmm,
+ bool conjoint, int shift, int offset, bool use64byteVector) {
+ assert(MaxVectorSize == 64 || MaxVectorSize == 32, "vector length mismatch");
+ if (!use64byteVector) {
+ if (conjoint) {
+ copy32_avx(dst, src, index, xmm, shift, offset+32);
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ } else {
+ copy32_avx(dst, src, index, xmm, shift, offset);
+ copy32_avx(dst, src, index, xmm, shift, offset+32);
+ }
+ } else {
+ Address::ScaleFactor scale = (Address::ScaleFactor)(shift);
+ evmovdquq(xmm, Address(src, index, scale, offset), Assembler::AVX_512bit);
+ evmovdquq(Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit);
+ }
+}
+
+#endif // COMPILER2_OR_JVMCI
+
+#endif
diff --git a/src/hotspot/cpu/x86/methodHandles_x86.hpp b/src/hotspot/cpu/x86/methodHandles_x86.hpp
index bb333781a62..444d0495666 100644
--- a/src/hotspot/cpu/x86/methodHandles_x86.hpp
+++ b/src/hotspot/cpu/x86/methodHandles_x86.hpp
@@ -27,7 +27,7 @@
// Adapters
enum /* platform_dependent_constants */ {
- adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
+ adapter_code_size = 4000 DEBUG_ONLY(+ 6000)
};
// Additional helper methods for MethodHandles code generation:
diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
index 066d1ae98cb..3e2b3a118c7 100644
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp
@@ -37,6 +37,7 @@
#include "memory/resourceArea.hpp"
#include "oops/compiledICHolder.hpp"
#include "oops/klass.inline.hpp"
+#include "prims/methodHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/vframeArray.hpp"
@@ -1213,265 +1214,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty
}
}
-
-static void save_or_restore_arguments(MacroAssembler* masm,
- const int stack_slots,
- const int total_in_args,
- const int arg_save_area,
- OopMap* map,
- VMRegPair* in_regs,
- BasicType* in_sig_bt) {
- // if map is non-NULL then the code should store the values,
- // otherwise it should load them.
- int handle_index = 0;
- // Save down double word first
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) {
- int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- handle_index += 2;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) {
- int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- handle_index += 2;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movl(Address(rsp, offset), in_regs[i].first()->as_Register());
- if (in_regs[i].second()->is_Register()) {
- __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register());
- }
- } else {
- __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset));
- if (in_regs[i].second()->is_Register()) {
- __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4));
- }
- }
- }
- }
- // Save or restore single word registers
- for ( int i = 0; i < total_in_args; i++) {
- if (in_regs[i].first()->is_Register()) {
- int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- assert(handle_index <= stack_slots, "overflow");
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- map->set_oop(VMRegImpl::stack2reg(slot));;
- }
-
- // Value is in an input register pass we must flush it to the stack
- const Register reg = in_regs[i].first()->as_Register();
- switch (in_sig_bt[i]) {
- case T_ARRAY:
- if (map != NULL) {
- __ movptr(Address(rsp, offset), reg);
- } else {
- __ movptr(reg, Address(rsp, offset));
- }
- break;
- case T_BOOLEAN:
- case T_CHAR:
- case T_BYTE:
- case T_SHORT:
- case T_INT:
- if (map != NULL) {
- __ movl(Address(rsp, offset), reg);
- } else {
- __ movl(reg, Address(rsp, offset));
- }
- break;
- case T_OBJECT:
- default: ShouldNotReachHere();
- }
- } else if (in_regs[i].first()->is_XMMRegister()) {
- if (in_sig_bt[i] == T_FLOAT) {
- int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area;
- int offset = slot * VMRegImpl::stack_slot_size;
- assert(handle_index <= stack_slots, "overflow");
- if (map != NULL) {
- __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister());
- } else {
- __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset));
- }
- }
- } else if (in_regs[i].first()->is_stack()) {
- if (in_sig_bt[i] == T_ARRAY && map != NULL) {
- int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
- map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
- }
- }
- }
-}
-
-// Registers need to be saved for runtime call
-static Register caller_saved_registers[] = {
- rcx, rdx, rsi, rdi
-};
-
-// Save caller saved registers except r1 and r2
-static void save_registers_except(MacroAssembler* masm, Register r1, Register r2) {
- int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
- for (int index = 0; index < reg_len; index ++) {
- Register this_reg = caller_saved_registers[index];
- if (this_reg != r1 && this_reg != r2) {
- __ push(this_reg);
- }
- }
-}
-
-// Restore caller saved registers except r1 and r2
-static void restore_registers_except(MacroAssembler* masm, Register r1, Register r2) {
- int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register));
- for (int index = reg_len - 1; index >= 0; index --) {
- Register this_reg = caller_saved_registers[index];
- if (this_reg != r1 && this_reg != r2) {
- __ pop(this_reg);
- }
- }
-}
-
-// Pin object, return pinned object or null in rax
-static void gen_pin_object(MacroAssembler* masm,
- Register thread, VMRegPair reg) {
- __ block_comment("gen_pin_object {");
-
- Label is_null;
- Register tmp_reg = rax;
- VMRegPair tmp(tmp_reg->as_VMReg());
- if (reg.first()->is_stack()) {
- // Load the arg up from the stack
- simple_move32(masm, reg, tmp);
- reg = tmp;
- } else {
- __ movl(tmp_reg, reg.first()->as_Register());
- }
- __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
- __ jccb(Assembler::equal, is_null);
-
- // Save registers that may be used by runtime call
- Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
- save_registers_except(masm, arg, thread);
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::pin_object),
- thread, reg.first()->as_Register());
-
- // Restore saved registers
- restore_registers_except(masm, arg, thread);
-
- __ bind(is_null);
- __ block_comment("} gen_pin_object");
-}
-
-// Unpin object
-static void gen_unpin_object(MacroAssembler* masm,
- Register thread, VMRegPair reg) {
- __ block_comment("gen_unpin_object {");
- Label is_null;
-
- // temp register
- __ push(rax);
- Register tmp_reg = rax;
- VMRegPair tmp(tmp_reg->as_VMReg());
-
- simple_move32(masm, reg, tmp);
-
- __ testptr(rax, rax);
- __ jccb(Assembler::equal, is_null);
-
- // Save registers that may be used by runtime call
- Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg;
- save_registers_except(masm, arg, thread);
-
- __ call_VM_leaf(
- CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object),
- thread, rax);
-
- // Restore saved registers
- restore_registers_except(masm, arg, thread);
- __ bind(is_null);
- __ pop(rax);
- __ block_comment("} gen_unpin_object");
-}
-
-// Check GCLocker::needs_gc and enter the runtime if it's true. This
-// keeps a new JNI critical region from starting until a GC has been
-// forced. Save down any oops in registers and describe them in an
-// OopMap.
-static void check_needs_gc_for_critical_native(MacroAssembler* masm,
- Register thread,
- int stack_slots,
- int total_c_args,
- int total_in_args,
- int arg_save_area,
-