diff --git a/.github/workflows/submit.yml b/.github/workflows/submit.yml index 382c4e20872..ebe72381738 100644 --- a/.github/workflows/submit.yml +++ b/.github/workflows/submit.yml @@ -9,7 +9,7 @@ on: platforms: description: "Platform(s) to execute on" required: true - default: "Linux x64, Windows x64, macOS x64" + default: "Linux x64, Linux x86, Windows x64, macOS x64" jobs: prerequisites: @@ -18,6 +18,7 @@ jobs: outputs: should_run: ${{ steps.check_submit.outputs.should_run }} bundle_id: ${{ steps.check_bundle_id.outputs.bundle_id }} + platform_linux_x86: ${{ steps.check_platforms.outputs.platform_linux_x86 }} platform_linux_x64: ${{ steps.check_platforms.outputs.platform_linux_x64 }} platform_windows_x64: ${{ steps.check_platforms.outputs.platform_windows_x64 }} platform_macos_x64: ${{ steps.check_platforms.outputs.platform_macos_x64 }} @@ -32,11 +33,13 @@ jobs: id: check_platforms run: | echo "::set-output name=platform_linux_x64::${{ contains(github.event.inputs.platforms, 'linux x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x64'))) }}" + echo "::set-output name=platform_linux_x86::${{ contains(github.event.inputs.platforms, 'linux x86') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'linux x86'))) }}" echo "::set-output name=platform_windows_x64::${{ contains(github.event.inputs.platforms, 'windows x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'windows x64'))) }}" echo "::set-output name=platform_macos_x64::${{ contains(github.event.inputs.platforms, 'macos x64') || (github.event.inputs.platforms == '' && (secrets.JDK_SUBMIT_PLATFORMS == '' || contains(secrets.JDK_SUBMIT_PLATFORMS, 'macos x64'))) }}" if: steps.check_submit.outputs.should_run != 'false' - name: Determine unique bundle identifier + id: check_bundle_id run: echo "::set-output name=bundle_id::${GITHUB_ACTOR}_${GITHUB_SHA:0:8}" if: steps.check_submit.outputs.should_run != 'false' @@ -113,7 +116,7 @@ jobs: flags: --enable-debug artifact: -debug - flavor: build hotspot no-pch - flags: --disable-precompiled-headers + flags: --enable-debug --disable-precompiled-headers build-target: hotspot - flavor: build hotspot zero flags: --enable-debug --disable-precompiled-headers --with-jvm-variants=zero @@ -348,12 +351,326 @@ jobs: if: always() run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV - - name: Persist test logs + - name: Package test results + if: always() + working-directory: build/run-test-prebuilt/test-results/ + run: > + zip -r9 + "$HOME/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip" + . + continue-on-error: true + + - name: Package test support + if: always() + working-directory: build/run-test-prebuilt/test-support/ + run: > + zip -r9 + "$HOME/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip" + . + -i *.jtr + -i */hs_err*.log + -i */replay*.log + continue-on-error: true + + - name: Persist test results + if: always() + uses: actions/upload-artifact@v2 + with: + path: ~/linux-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip + continue-on-error: true + + - name: Persist test outputs + if: always() + uses: actions/upload-artifact@v2 + with: + path: ~/linux-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip + continue-on-error: true + + linux_x86_build: + name: Linux x86 + runs-on: "ubuntu-latest" + needs: prerequisites + if: needs.prerequisites.outputs.should_run != 'false' && needs.prerequisites.outputs.platform_linux_x86 != 'false' + + strategy: + fail-fast: false + matrix: + flavor: + - build release + - build debug + include: + - flavor: build debug + flags: --enable-debug + artifact: -debug + + # Reduced 32-bit build uses the same boot JDK as 64-bit build + env: + JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}" + BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}" + BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}" + BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}" + BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}" + + steps: + - name: Checkout the source + uses: actions/checkout@v2 + with: + path: jdk + + - name: Restore boot JDK from cache + id: bootjdk + uses: actions/cache@v2 + with: + path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }} + key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1 + + - name: Download boot JDK + run: | + mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}" + wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}" + echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null - + tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}" + mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/" + if: steps.bootjdk.outputs.cache-hit != 'true' + + - name: Restore jtreg artifact + id: jtreg_restore + uses: actions/download-artifact@v2 + with: + name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jtreg/ + continue-on-error: true + + - name: Restore jtreg artifact (retry) + uses: actions/download-artifact@v2 + with: + name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jtreg/ + if: steps.jtreg_restore.outcome == 'failure' + + - name: Checkout gtest sources + uses: actions/checkout@v2 + with: + repository: "google/googletest" + ref: "release-${{ fromJson(needs.prerequisites.outputs.dependencies).GTEST_VERSION }}" + path: gtest + + # Roll in the multilib environment and its dependencies. + # Some multilib libraries do not have proper inter-dependencies, so we have to + # install their dependencies manually. + - name: Install dependencies + run: | + sudo dpkg --add-architecture i386 + sudo apt-get update + sudo apt-get install gcc-multilib g++-multilib libfreetype6-dev:i386 libxrandr-dev:i386 libxtst-dev:i386 libtiff-dev:i386 libcupsimage2-dev:i386 libcups2-dev:i386 libasound2-dev:i386 + + - name: Configure + run: > + bash configure + --with-conf-name=linux-x86 + --with-target-bits=32 + ${{ matrix.flags }} + --with-version-opt=${GITHUB_ACTOR}-${GITHUB_SHA} + --with-version-build=0 + --with-boot-jdk=${HOME}/bootjdk/${BOOT_JDK_VERSION} + --with-jtreg=${HOME}/jtreg + --with-gtest=${GITHUB_WORKSPACE}/gtest + --with-default-make-target="product-bundles test-bundles" + --with-zlib=system + --enable-jtreg-failure-handler + working-directory: jdk + + - name: Build + run: make CONF_NAME=linux-x86 ${{ matrix.build-target }} + working-directory: jdk + + - name: Persist test bundles + uses: actions/upload-artifact@v2 + with: + name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }} + path: | + jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz + jdk/build/linux-x86/bundles/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz + if: matrix.build-target == false + + linux_x86_test: + name: Linux x86 + runs-on: "ubuntu-latest" + needs: + - prerequisites + - linux_x86_build + + strategy: + fail-fast: false + matrix: + test: + - jdk/tier1 part 1 + - jdk/tier1 part 2 + - jdk/tier1 part 3 + - langtools/tier1 + - hs/tier1 common + - hs/tier1 compiler + - hs/tier1 gc + - hs/tier1 runtime + - hs/tier1 serviceability + include: + - test: jdk/tier1 part 1 + suites: test/jdk/:tier1_part1 + - test: jdk/tier1 part 2 + suites: test/jdk/:tier1_part2 + - test: jdk/tier1 part 3 + suites: test/jdk/:tier1_part3 + - test: langtools/tier1 + suites: test/langtools/:tier1 + - test: hs/tier1 common + suites: test/hotspot/jtreg/:tier1_common + artifact: -debug + - test: hs/tier1 compiler + suites: test/hotspot/jtreg/:tier1_compiler + artifact: -debug + - test: hs/tier1 gc + suites: test/hotspot/jtreg/:tier1_gc + artifact: -debug + - test: hs/tier1 runtime + suites: test/hotspot/jtreg/:tier1_runtime + artifact: -debug + - test: hs/tier1 serviceability + suites: test/hotspot/jtreg/:tier1_serviceability + artifact: -debug + + # Reduced 32-bit build uses the same boot JDK as 64-bit build + env: + JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).DEFAULT_VERSION_FEATURE }}" + BOOT_JDK_VERSION: "${{ fromJson(needs.prerequisites.outputs.dependencies).BOOT_JDK_VERSION }}" + BOOT_JDK_FILENAME: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_FILENAME }}" + BOOT_JDK_URL: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_URL }}" + BOOT_JDK_SHA256: "${{ fromJson(needs.prerequisites.outputs.dependencies).LINUX_X64_BOOT_JDK_SHA256 }}" + + steps: + - name: Checkout the source + uses: actions/checkout@v2 + + - name: Restore boot JDK from cache + id: bootjdk + uses: actions/cache@v2 + with: + path: ~/bootjdk/${{ env.BOOT_JDK_VERSION }} + key: bootjdk-${{ runner.os }}-${{ env.BOOT_JDK_VERSION }}-${{ env.BOOT_JDK_SHA256 }}-v1 + + - name: Download boot JDK + run: | + mkdir -p "${HOME}/bootjdk/${BOOT_JDK_VERSION}" + wget -O "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" "${BOOT_JDK_URL}" + echo "${BOOT_JDK_SHA256} ${HOME}/bootjdk/${BOOT_JDK_FILENAME}" | sha256sum -c >/dev/null - + tar -xf "${HOME}/bootjdk/${BOOT_JDK_FILENAME}" -C "${HOME}/bootjdk/${BOOT_JDK_VERSION}" + mv "${HOME}/bootjdk/${BOOT_JDK_VERSION}/"*/* "${HOME}/bootjdk/${BOOT_JDK_VERSION}/" + if: steps.bootjdk.outputs.cache-hit != 'true' + + - name: Restore jtreg artifact + id: jtreg_restore + uses: actions/download-artifact@v2 + with: + name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jtreg/ + continue-on-error: true + + - name: Restore jtreg artifact (retry) + uses: actions/download-artifact@v2 + with: + name: transient_jtreg_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jtreg/ + if: steps.jtreg_restore.outcome == 'failure' + + - name: Restore build artifacts + id: build_restore + uses: actions/download-artifact@v2 + with: + name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jdk-linux-x86${{ matrix.artifact }} + continue-on-error: true + + - name: Restore build artifacts (retry) + uses: actions/download-artifact@v2 + with: + name: transient_jdk-linux-x86${{ matrix.artifact }}_${{ needs.prerequisites.outputs.bundle_id }} + path: ~/jdk-linux-x86${{ matrix.artifact }} + if: steps.build_restore.outcome == 'failure' + + - name: Unpack jdk + run: | + mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}" + tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }}" + + - name: Unpack tests + run: | + mkdir -p "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}" + tar -xf "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}.tar.gz" -C "${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }}" + + - name: Find root of jdk image dir + run: | + imageroot=`find ${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin${{ matrix.artifact }} -name release -type f` + echo "imageroot=`dirname ${imageroot}`" >> $GITHUB_ENV + + - name: Run tests + run: > + JDK_IMAGE_DIR=${{ env.imageroot }} + TEST_IMAGE_DIR=${HOME}/jdk-linux-x86${{ matrix.artifact }}/jdk-${{ env.JDK_VERSION }}-internal+0_linux-x86_bin-tests${{ matrix.artifact }} + BOOT_JDK=${HOME}/bootjdk/${BOOT_JDK_VERSION} + JT_HOME=${HOME}/jtreg + make test-prebuilt + CONF_NAME=run-test-prebuilt + LOG_CMDLINES=true + JTREG_VERBOSE=fail,error,time + TEST="${{ matrix.suites }}" + TEST_OPTS_JAVA_OPTIONS= + JTREG_KEYWORDS="!headful" + JTREG="JAVA_OPTIONS=-XX:-CreateCoredumpOnCrash" + + - name: Check that all tests executed successfully + if: always() + run: > + if ! grep --include=test-summary.txt -lqr build/*/test-results -e "TEST SUCCESS" ; then + cat build/*/test-results/*/text/newfailures.txt ; + exit 1 ; + fi + + - name: Create suitable test log artifact name + if: always() + run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV + + - name: Package test results + if: always() + working-directory: build/run-test-prebuilt/test-results/ + run: > + zip -r9 + "$HOME/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip" + . + continue-on-error: true + + - name: Package test support + if: always() + working-directory: build/run-test-prebuilt/test-support/ + run: > + zip -r9 + "$HOME/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip" + . + -i *.jtr + -i */hs_err*.log + -i */replay*.log + continue-on-error: true + + - name: Persist test results if: always() uses: actions/upload-artifact@v2 with: - name: linux-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }} - path: build/*/test-results + path: ~/linux-x86${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip + continue-on-error: true + + - name: Persist test outputs + if: always() + uses: actions/upload-artifact@v2 + with: + path: ~/linux-x86${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip continue-on-error: true windows_x64_build: @@ -635,12 +952,41 @@ jobs: if: always() run: echo ("logsuffix=" + ("${{ matrix.test }}" -replace "/", "_" -replace " ", "_")) | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 - - name: Persist test logs + - name: Package test results + if: always() + working-directory: build/run-test-prebuilt/test-results/ + run: > + $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ; + zip -r9 + "$HOME/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip" + . + continue-on-error: true + + - name: Package test support + if: always() + working-directory: build/run-test-prebuilt/test-support/ + run: > + $env:Path = "$HOME\cygwin\cygwin64\bin;$env:Path" ; + zip -r9 + "$HOME/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip" + . + -i *.jtr + -i */hs_err*.log + -i */replay*.log + continue-on-error: true + + - name: Persist test results if: always() uses: actions/upload-artifact@v2 with: - name: windows-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }} - path: build/*/test-results + path: ~/windows-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip + continue-on-error: true + + - name: Persist test outputs + if: always() + uses: actions/upload-artifact@v2 + with: + path: ~/windows-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip continue-on-error: true macos_x64_build: @@ -890,12 +1236,39 @@ jobs: if: always() run: echo "logsuffix=`echo ${{ matrix.test }} | sed -e 's!/!_!'g -e 's! !_!'g`" >> $GITHUB_ENV - - name: Persist test logs + - name: Package test results + if: always() + working-directory: build/run-test-prebuilt/test-results/ + run: > + zip -r9 + "$HOME/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip" + . + continue-on-error: true + + - name: Package test support + if: always() + working-directory: build/run-test-prebuilt/test-support/ + run: > + zip -r9 + "$HOME/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip" + . + -i *.jtr + -i */hs_err*.log + -i */replay*.log + continue-on-error: true + + - name: Persist test results + if: always() + uses: actions/upload-artifact@v2 + with: + path: ~/macos-x64${{ matrix.artifact }}_testresults_${{ env.logsuffix }}.zip + continue-on-error: true + + - name: Persist test outputs if: always() uses: actions/upload-artifact@v2 with: - name: macos-x64${{ matrix.artifact }}_testlogs_${{ env.logsuffix }} - path: build/*/test-results + path: ~/macos-x64${{ matrix.artifact }}_testsupport_${{ env.logsuffix }}.zip continue-on-error: true artifacts: @@ -904,7 +1277,9 @@ jobs: if: always() continue-on-error: true needs: + - prerequisites - linux_x64_test + - linux_x86_test - windows_x64_test - macos_x64_test diff --git a/.gitignore b/.gitignore index c34d27c8470..cf21c8919cd 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /dist/ /.idea/ /.vscode/ +/nbproject/ nbproject/private/ /webrev /.src-rev @@ -14,3 +15,4 @@ test/nashorn/lib NashornProfile.txt **/JTreport/** **/JTwork/** +/src/utils/LogCompilation/target/ diff --git a/.hgignore b/.hgignore deleted file mode 100644 index 312ce62a641..00000000000 --- a/.hgignore +++ /dev/null @@ -1,18 +0,0 @@ -^build/ -^dist/ -^.idea/ -^.vscode/ -nbproject/private/ -^webrev -^.src-rev$ -^.jib/ -(^|/)\.DS_Store -(^|/)\.metadata/ -(^|/)\.recommenders/ -test/nashorn/script/external -test/nashorn/lib -NashornProfile.txt -(^|/)JTreport/ -(^|/)JTwork/ -(^|/)\.git/ -^src/utils/hsdis/build/ \ No newline at end of file diff --git a/doc/building.html b/doc/building.html index 5f615f9d4ef..318a24aa840 100644 --- a/doc/building.html +++ b/doc/building.html @@ -78,6 +78,7 @@

Building the JDK

  • Native Libraries
  • Creating And Using Sysroots With qemu-deboostrap
  • Building for ARM/aarch64
  • +
  • Building for musl
  • Verifying the Build
  • Build Performance

    The build does not create new files in that chroot, so it can be reused for multiple builds without additional cleanup.

    Architectures that are known to successfully cross-compile like this are:

    @@ -688,6 +706,15 @@

    Creating And Using Sys

    Additional architectures might be supported by Debian/Ubuntu Ports.

    Building for ARM/aarch64

    A common cross-compilation target is the ARM CPU. When building for ARM, it is useful to set the ABI profile. A number of pre-defined ABI profiles are available using --with-abi-profile: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK.

    +

    Building for musl

    +

    Just like it's possible to cross-compile for a different CPU, it's possible to cross-compile for musl libc on a glibc-based build system. A devkit suitable for most target CPU architectures can be obtained from musl.cc. After installing the required packages in the sysroot, configure the build with --openjdk-target:

    +
    sh ./configure --with-jvm-variants=server \
    +--with-boot-jdk=$BOOT_JDK \
    +--with-build-jdk=$BUILD_JDK \
    +--openjdk-target=x86_64-unknown-linux-musl \
    +--with-devkit=$DEVKIT \
    +--with-sysroot=$SYSROOT
    +

    and run make normally.

    Verifying the Build

    The build will end up in a directory named like build/linux-arm-normal-server-release.

    Inside this build output directory, the images/jdk will contain the newly built JDK, for your target system.

    diff --git a/doc/building.md b/doc/building.md index 47fa445998d..e0ac5c7b6c7 100644 --- a/doc/building.md +++ b/doc/building.md @@ -273,6 +273,13 @@ For rpm-based distributions (Fedora, Red Hat, etc), try this: sudo yum groupinstall "Development Tools" ``` +For Alpine Linux, aside from basic tooling, install the GNU versions of some +programs: + +``` +sudo apk add build-base bash grep zip +``` + ### AIX Please consult the AIX section of the [Supported Build Platforms]( @@ -431,6 +438,7 @@ rather than bundling the JDK's own copy. libfreetype6-dev`. * To install on an rpm-based Linux, try running `sudo yum install freetype-devel`. + * To install on Alpine Linux, try running `sudo apk add freetype-dev`. Use `--with-freetype-include=` and `--with-freetype-lib=` if `configure` does not automatically locate the platform FreeType files. @@ -445,6 +453,7 @@ your operating system. libcups2-dev`. * To install on an rpm-based Linux, try running `sudo yum install cups-devel`. + * To install on Alpine Linux, try running `sudo apk add cups-dev`. Use `--with-cups=` if `configure` does not properly locate your CUPS files. @@ -458,6 +467,8 @@ Linux. libx11-dev libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`. * To install on an rpm-based Linux, try running `sudo yum install libXtst-devel libXt-devel libXrender-devel libXrandr-devel libXi-devel`. + * To install on Alpine Linux, try running `sudo apk add libx11-dev + libxext-dev libxrender-dev libxrandr-dev libxtst-dev libxt-dev`. Use `--with-x=` if `configure` does not properly locate your X11 files. @@ -470,6 +481,7 @@ required on Linux. At least version 0.9.1 of ALSA is required. libasound2-dev`. * To install on an rpm-based Linux, try running `sudo yum install alsa-lib-devel`. + * To install on Alpine Linux, try running `sudo apk add alsa-lib-dev`. Use `--with-alsa=` if `configure` does not properly locate your ALSA files. @@ -484,6 +496,7 @@ Hotspot. libffi-dev`. * To install on an rpm-based Linux, try running `sudo yum install libffi-devel`. + * To install on Alpine Linux, try running `sudo apk add libffi-dev`. Use `--with-libffi=` if `configure` does not properly locate your libffi files. @@ -499,6 +512,7 @@ platforms. At least version 2.69 is required. autoconf`. * To install on an rpm-based Linux, try running `sudo yum install autoconf`. + * To install on Alpine Linux, try running `sudo apk add autoconf`. * To install on macOS, try running `brew install autoconf`. * To install on Windows, try running `/setup-x86_64 -q -P autoconf`. @@ -1072,23 +1086,39 @@ for foreign architectures with native compilation speed. For example, cross-compiling to AArch64 from x86_64 could be done like this: * Install cross-compiler on the *build* system: -``` -apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu -``` + ``` + apt install g++-aarch64-linux-gnu gcc-aarch64-linux-gnu + ``` * Create chroot on the *build* system, configuring it for *target* system: -``` -sudo qemu-debootstrap --arch=arm64 --verbose \ - --include=fakeroot,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng12-dev \ - --resolve-deps jessie /chroots/arm64 http://httpredir.debian.org/debian/ -``` + ``` + sudo qemu-debootstrap \ + --arch=arm64 \ + --verbose \ + --include=fakeroot,symlinks,build-essential,libx11-dev,libxext-dev,libxrender-dev,libxrandr-dev,libxtst-dev,libxt-dev,libcups2-dev,libfontconfig1-dev,libasound2-dev,libfreetype6-dev,libpng-dev \ + --resolve-deps \ + buster \ + ~/sysroot-arm64 \ + http://httpredir.debian.org/debian/ + ``` + + * Make sure the symlinks inside the newly created chroot point to proper locations: + ``` + sudo chroot ~/sysroot-arm64 symlinks -cr . + ``` * Configure and build with newly created chroot as sysroot/toolchain-path: -``` -CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure --openjdk-target=aarch64-linux-gnu --with-sysroot=/chroots/arm64/ --with-toolchain-path=/chroots/arm64/ -make images -ls build/linux-aarch64-normal-server-release/ -``` + ``` + CC=aarch64-linux-gnu-gcc CXX=aarch64-linux-gnu-g++ sh ./configure \ + --openjdk-target=aarch64-linux-gnu \ + --with-sysroot=~/sysroot-arm64 \ + --with-toolchain-path=~/sysroot-arm64 \ + --with-freetype-lib=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ \ + --with-freetype-include=~/sysroot-arm64/usr/include/freetype2/ \ + --x-libraries=~/sysroot-arm64/usr/lib/aarch64-linux-gnu/ + make images + ls build/linux-aarch64-server-release/ + ``` The build does not create new files in that chroot, so it can be reused for multiple builds without additional cleanup. @@ -1113,6 +1143,25 @@ available using `--with-abi-profile`: arm-vfp-sflt, arm-vfp-hflt, arm-sflt, armv5-vfp-sflt, armv6-vfp-hflt. Note that soft-float ABIs are no longer properly supported by the JDK. +### Building for musl + +Just like it's possible to cross-compile for a different CPU, it's possible to +cross-compile for musl libc on a glibc-based *build* system. +A devkit suitable for most target CPU architectures can be obtained from +[musl.cc](https://musl.cc). After installing the required packages in the +sysroot, configure the build with `--openjdk-target`: + +``` +sh ./configure --with-jvm-variants=server \ +--with-boot-jdk=$BOOT_JDK \ +--with-build-jdk=$BUILD_JDK \ +--openjdk-target=x86_64-unknown-linux-musl \ +--with-devkit=$DEVKIT \ +--with-sysroot=$SYSROOT +``` + +and run `make` normally. + ### Verifying the Build The build will end up in a directory named like diff --git a/make/Bundles.gmk b/make/Bundles.gmk index b7c8ddbfbe7..b52b5720772 100644 --- a/make/Bundles.gmk +++ b/make/Bundles.gmk @@ -410,17 +410,43 @@ endif ################################################################################ -ifneq ($(filter docs-bundles, $(MAKECMDGOALS)), ) - DOCS_BUNDLE_FILES := $(call FindFiles, $(DOCS_IMAGE_DIR)) +ifneq ($(filter docs-jdk-bundles, $(MAKECMDGOALS)), ) + DOCS_JDK_BUNDLE_FILES := $(call FindFiles, $(DOCS_JDK_IMAGE_DIR)) - $(eval $(call SetupBundleFile, BUILD_DOCS_BUNDLE, \ - BUNDLE_NAME := $(DOCS_BUNDLE_NAME), \ - FILES := $(DOCS_BUNDLE_FILES), \ - BASE_DIRS := $(DOCS_IMAGE_DIR), \ + $(eval $(call SetupBundleFile, BUILD_DOCS_JDK_BUNDLE, \ + BUNDLE_NAME := $(DOCS_JDK_BUNDLE_NAME), \ + FILES := $(DOCS_JDK_BUNDLE_FILES), \ + BASE_DIRS := $(DOCS_JDK_IMAGE_DIR), \ SUBDIR := docs, \ )) - DOCS_TARGETS += $(BUILD_DOCS_BUNDLE) + DOCS_JDK_TARGETS += $(BUILD_DOCS_JDK_BUNDLE) +endif + +ifneq ($(filter docs-javase-bundles, $(MAKECMDGOALS)), ) + DOCS_JAVASE_BUNDLE_FILES := $(call FindFiles, $(DOCS_JAVASE_IMAGE_DIR)) + + $(eval $(call SetupBundleFile, BUILD_DOCS_JAVASE_BUNDLE, \ + BUNDLE_NAME := $(DOCS_JAVASE_BUNDLE_NAME), \ + FILES := $(DOCS_JAVASE_BUNDLE_FILES), \ + BASE_DIRS := $(DOCS_JAVASE_IMAGE_DIR), \ + SUBDIR := docs-javase, \ + )) + + DOCS_JAVASE_TARGETS += $(BUILD_DOCS_JAVASE_BUNDLE) +endif + +ifneq ($(filter docs-reference-bundles, $(MAKECMDGOALS)), ) + DOCS_REFERENCE_BUNDLE_FILES := $(call FindFiles, $(DOCS_REFERENCE_IMAGE_DIR)) + + $(eval $(call SetupBundleFile, BUILD_DOCS_REFERENCE_BUNDLE, \ + BUNDLE_NAME := $(DOCS_REFERENCE_BUNDLE_NAME), \ + FILES := $(DOCS_REFERENCE_BUNDLE_FILES), \ + BASE_DIRS := $(DOCS_REFERENCE_IMAGE_DIR), \ + SUBDIR := docs-reference, \ + )) + + DOCS_REFERENCE_TARGETS += $(BUILD_DOCS_REFERENCE_BUNDLE) endif ################################################################################ @@ -469,9 +495,12 @@ $(eval $(call IncludeCustomExtension, Bundles.gmk)) product-bundles: $(PRODUCT_TARGETS) legacy-bundles: $(LEGACY_TARGETS) test-bundles: $(TEST_TARGETS) -docs-bundles: $(DOCS_TARGETS) +docs-jdk-bundles: $(DOCS_JDK_TARGETS) +docs-javase-bundles: $(DOCS_JAVASE_TARGETS) +docs-reference-bundles: $(DOCS_REFERENCE_TARGETS) static-libs-bundles: $(STATIC_LIBS_TARGETS) jcov-bundles: $(JCOV_TARGETS) -.PHONY: all default product-bundles test-bundles docs-bundles \ +.PHONY: all default product-bundles test-bundles \ + docs-jdk-bundles docs-javase-bundles docs-reference-bundles \ static-libs-bundles jcov-bundles diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk index c4d25c90122..e8997e0da83 100644 --- a/make/CompileJavaModules.gmk +++ b/make/CompileJavaModules.gmk @@ -184,10 +184,6 @@ ifeq ($(call isTargetOs, windows), true) java.desktop_EXCLUDES += com/sun/java/swing/plaf/gtk endif -ifdef BUILD_HEADLESS_ONLY - java.desktop_EXCLUDES += sun/applet -endif - ifeq ($(call isTargetOs, windows macosx), false) java.desktop_EXCLUDE_FILES += sun/awt/AWTCharset.java endif @@ -389,11 +385,11 @@ endif ################################################################################ -jdk.incubator.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \ +jdk.jpackage_COPY += .gif .png .txt .spec .script .prerm .preinst \ .postrm .postinst .list .sh .desktop .copyright .control .plist .template \ .icns .scpt .wxs .wxl .wxi .ico .bmp .tiff -jdk.incubator.jpackage_CLEAN += .properties +jdk.jpackage_CLEAN += .properties ################################################################################ @@ -546,6 +542,10 @@ jdk.jfr_DISABLED_WARNINGS += exports jdk.jfr_COPY := .xsd .xml .dtd jdk.jfr_JAVAC_FLAGS := -XDstringConcat=inline +################################################################################ + +jdk.incubator.vector_DOCLINT += -Xdoclint:all/protected + ################################################################################ # If this is an imported module that has prebuilt classes, only compile # module-info.java. diff --git a/make/CompileModuleTools.gmk b/make/CompileModuleTools.gmk index c6322e5b36e..18cd42f0612 100644 --- a/make/CompileModuleTools.gmk +++ b/make/CompileModuleTools.gmk @@ -33,8 +33,20 @@ include JavaCompilation.gmk TOOLS_CLASSES_DIR := $(BUILDTOOLS_OUTPUTDIR)/tools_jigsaw_classes +# When using an external BUILDJDK, make it possible to shortcut building of +# these tools using the BUILD_JAVAC instead of having to build the complete +# exploded image first. +ifeq ($(EXTERNAL_BUILDJDK), true) + COMPILER := buildjdk + TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK) +else + COMPILER := interim + TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED) +endif + $(eval $(call SetupJavaCompilation, BUILD_JIGSAW_TOOLS, \ - TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ + TARGET_RELEASE := $(TARGET_RELEASE), \ + COMPILER := $(COMPILER), \ SRC := $(TOPDIR)/make/jdk/src/classes, \ INCLUDES := build/tools/deps \ build/tools/docs \ diff --git a/make/CompileToolsJdk.gmk b/make/CompileToolsJdk.gmk index a671f934998..2f09476aa67 100644 --- a/make/CompileToolsJdk.gmk +++ b/make/CompileToolsJdk.gmk @@ -56,7 +56,8 @@ $(eval $(call SetupJavaCompilation, BUILD_TOOLS_JDK, \ DISABLED_WARNINGS := options, \ JAVAC_FLAGS := \ --add-exports java.desktop/sun.awt=ALL-UNNAMED \ - --add-exports java.base/sun.text=ALL-UNNAMED, \ + --add-exports java.base/sun.text=ALL-UNNAMED \ + --add-exports java.base/sun.security.util=ALL-UNNAMED, \ )) TARGETS += $(BUILD_TOOLS_JDK) diff --git a/make/Docs.gmk b/make/Docs.gmk index 19e962b79af..1f7a0caf819 100644 --- a/make/Docs.gmk +++ b/make/Docs.gmk @@ -458,7 +458,7 @@ $(eval $(call SetupApiDocsGeneration, JAVASE_API, \ MODULES := $(JAVASE_MODULES), \ SHORT_NAME := $(JAVASE_SHORT_NAME), \ LONG_NAME := $(JAVASE_LONG_NAME), \ - TARGET_DIR := $(IMAGES_OUTPUTDIR)/javase-docs/api, \ + TARGET_DIR := $(DOCS_JAVASE_IMAGE_DIR)/api, \ )) # Targets generated are returned in JAVASE_API_JAVADOC_TARGETS and @@ -476,7 +476,7 @@ $(eval $(call SetupApiDocsGeneration, REFERENCE_API, \ MODULES := $(JAVASE_MODULES), \ SHORT_NAME := $(JAVASE_SHORT_NAME), \ LONG_NAME := $(JAVASE_LONG_NAME), \ - TARGET_DIR := $(IMAGES_OUTPUTDIR)/reference-docs/api, \ + TARGET_DIR := $(DOCS_REFERENCE_IMAGE_DIR)/api, \ JAVADOC_CMD := $(JAVADOC), \ OPTIONS := $(REFERENCE_OPTIONS), \ TAGS := $(REFERENCE_TAGS), \ diff --git a/make/Main.gmk b/make/Main.gmk index 493b795d35a..cdb4be67c56 100644 --- a/make/Main.gmk +++ b/make/Main.gmk @@ -90,7 +90,6 @@ $(eval $(call SetupTarget, buildtools-jdk, \ $(eval $(call SetupTarget, buildtools-modules, \ MAKEFILE := CompileModuleTools, \ - DEPS := exploded-image-base, \ )) $(eval $(call SetupTarget, buildtools-hotspot, \ @@ -339,7 +338,7 @@ $(eval $(call SetupTarget, test-image-demos-jdk, \ $(eval $(call SetupTarget, generate-summary, \ MAKEFILE := GenerateModuleSummary, \ - DEPS := jmods buildtools-modules, \ + DEPS := jmods buildtools-modules runnable-buildjdk, \ )) ################################################################################ @@ -469,7 +468,7 @@ $(eval $(call SetupTarget, docs-jdk-api-javadoc, \ $(eval $(call SetupTarget, docs-jdk-api-modulegraph, \ MAKEFILE := Docs, \ TARGET := docs-jdk-api-modulegraph, \ - DEPS := exploded-image buildtools-modules, \ + DEPS := buildtools-modules runnable-buildjdk, \ )) $(eval $(call SetupTarget, docs-javase-api-javadoc, \ @@ -480,7 +479,7 @@ $(eval $(call SetupTarget, docs-javase-api-javadoc, \ $(eval $(call SetupTarget, docs-javase-api-modulegraph, \ MAKEFILE := Docs, \ TARGET := docs-javase-api-modulegraph, \ - DEPS := exploded-image buildtools-modules, \ + DEPS := buildtools-modules runnable-buildjdk, \ )) $(eval $(call SetupTarget, docs-reference-api-javadoc, \ @@ -491,7 +490,7 @@ $(eval $(call SetupTarget, docs-reference-api-javadoc, \ $(eval $(call SetupTarget, docs-reference-api-modulegraph, \ MAKEFILE := Docs, \ TARGET := docs-reference-api-modulegraph, \ - DEPS := exploded-image buildtools-modules, \ + DEPS := buildtools-modules runnable-buildjdk, \ )) # The gensrc steps for jdk.jdi create html spec files. @@ -749,12 +748,24 @@ $(eval $(call SetupTarget, test-bundles, \ DEPS := test-image, \ )) -$(eval $(call SetupTarget, docs-bundles, \ +$(eval $(call SetupTarget, docs-jdk-bundles, \ MAKEFILE := Bundles, \ - TARGET := docs-bundles, \ + TARGET := docs-jdk-bundles, \ DEPS := docs-image, \ )) +$(eval $(call SetupTarget, docs-javase-bundles, \ + MAKEFILE := Bundles, \ + TARGET := docs-javase-bundles, \ + DEPS := docs-javase-image, \ +)) + +$(eval $(call SetupTarget, docs-reference-bundles, \ + MAKEFILE := Bundles, \ + TARGET := docs-reference-bundles, \ + DEPS := docs-reference-image, \ +)) + $(eval $(call SetupTarget, static-libs-bundles, \ MAKEFILE := Bundles, \ TARGET := static-libs-bundles, \ @@ -945,10 +956,13 @@ else $(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): java.base-libs java.base-copy \ java.base-gendata jdk.jlink-launchers java endif - else - # The normal non cross compilation case uses needs to wait for the full + else ifeq ($(EXTERNAL_BUILDJDK), false) + # The normal non cross compilation usecase needs to wait for the full # exploded-image to avoid a race with the optimize target. $(JMOD_TARGETS) $(INTERIM_JMOD_TARGETS): exploded-image + # The buildtools-modules are used for the exploded-image-optimize target, + # but can be built either using the exploded-image or an external BUILDJDK. + buildtools-modules: exploded-image-base endif # All modules include the main license files from java.base. @@ -1069,6 +1083,18 @@ ifneq ($(COMPILE_TYPE), cross) exploded-image: exploded-image-optimize endif +# The runnable-buildjdk target guarantees that the buildjdk is done +# building and ready to be used. The exact set of dependencies it needs +# depends on what kind of buildjdk is used for the current configuration. +runnable-buildjdk: +ifeq ($(CREATE_BUILDJDK), true) + ifneq ($(CREATING_BUILDJDK), true) + runnable-buildjdk: create-buildjdk + endif +else ifeq ($(EXTERNAL_BUILDJDK), false) + runnable-buildjdk: exploded-image +endif + create-buildjdk: create-buildjdk-interim-image docs-jdk-api: docs-jdk-api-javadoc @@ -1122,8 +1148,16 @@ ifeq ($(call isTargetOs, macosx), true) legacy-images: mac-legacy-jre-bundle endif -# This target builds the documentation image -docs-image: docs-jdk +# These targets build the various documentation images +docs-jdk-image: docs-jdk +docs-javase-image: docs-javase +docs-reference-image: docs-reference +# The docs-jdk-image is what most users expect to be built +docs-image: docs-jdk-image +all-docs-images: docs-jdk-image docs-javase-image docs-reference-image + +docs-bundles: docs-jdk-bundles +all-docs-bundles: docs-jdk-bundles docs-javase-bundles docs-reference-bundles # This target builds the test image test-image: prepare-test-image test-image-jdk-jtreg-native \ @@ -1156,7 +1190,7 @@ endif ################################################################################ # all-images builds all our deliverables as images. -all-images: product-images test-image docs-image +all-images: product-images test-image all-docs-images # all-bundles packages all our deliverables as tar.gz bundles. all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles @@ -1164,10 +1198,11 @@ all-bundles: product-bundles test-bundles docs-bundles static-libs-bundles ALL_TARGETS += buildtools hotspot hotspot-libs hotspot-gensrc gensrc gendata \ copy java libs static-libs launchers jmods \ jdk.jdwp.agent-gensrc $(ALL_MODULES) demos \ - exploded-image-base exploded-image \ + exploded-image-base exploded-image runnable-buildjdk \ create-buildjdk docs-jdk-api docs-javase-api docs-reference-api docs-jdk \ docs-javase docs-reference docs-javadoc mac-bundles product-images legacy-images \ - docs-image test-image all-images \ + docs-image docs-javase-image docs-reference-image all-docs-images \ + docs-bundles all-docs-bundles test-image all-images \ all-bundles ################################################################################ diff --git a/make/ReleaseFile.gmk b/make/ReleaseFile.gmk index 14ebc9c32ae..0424e2fb623 100644 --- a/make/ReleaseFile.gmk +++ b/make/ReleaseFile.gmk @@ -53,6 +53,7 @@ define create-info-file $(call info-file-item, "JAVA_VERSION_DATE", "$(VERSION_DATE)") $(call info-file-item, "OS_NAME", "$(RELEASE_FILE_OS_NAME)") $(call info-file-item, "OS_ARCH", "$(RELEASE_FILE_OS_ARCH)") + $(call info-file-item, "LIBC", "$(RELEASE_FILE_LIBC)") endef # Param 1 - The file containing the MODULES list diff --git a/make/ToolsJdk.gmk b/make/ToolsJdk.gmk index 296411559a9..45a0cc8c64e 100644 --- a/make/ToolsJdk.gmk +++ b/make/ToolsJdk.gmk @@ -1,5 +1,5 @@ # -# Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. # # This code is free software; you can redistribute it and/or modify it @@ -68,6 +68,7 @@ TOOL_TZDB = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \ build.tools.tzdb.TzdbZoneRulesCompiler TOOL_BLACKLISTED_CERTS = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \ + --add-exports java.base/sun.security.util=ALL-UNNAMED \ build.tools.blacklistedcertsconverter.BlacklistedCertsConverter TOOL_MAKEJAVASECURITY = $(JAVA_SMALL) -cp $(BUILDTOOLS_OUTPUTDIR)/jdk_tools_classes \ diff --git a/make/autoconf/build-aux/config.guess b/make/autoconf/build-aux/config.guess index b650b5109d0..14f21a25e8f 100644 --- a/make/autoconf/build-aux/config.guess +++ b/make/autoconf/build-aux/config.guess @@ -30,6 +30,17 @@ DIR=`dirname $0` OUT=`. $DIR/autoconf-config.guess` +# Detect C library. +# Use '-gnu' suffix on systems that use glibc. +# Use '-musl' suffix on systems that use the musl libc. +echo $OUT | grep -- -linux- > /dev/null 2> /dev/null +if test $? = 0; then + libc_vendor=`ldd --version 2>&1 | sed -n '1s/.*\(musl\).*/\1/p'` + if [ x"${libc_vendor}" = x"musl" ]; then + OUT=`echo $OUT | sed 's/-gnu/-musl/'` + fi +fi + # Test and fix cygwin on x86_64 echo $OUT | grep 86-pc-cygwin > /dev/null 2> /dev/null if test $? != 0; then diff --git a/make/autoconf/build-aux/config.sub b/make/autoconf/build-aux/config.sub index a36e6690728..d0dd001abdf 100644 --- a/make/autoconf/build-aux/config.sub +++ b/make/autoconf/build-aux/config.sub @@ -29,6 +29,11 @@ DIR=`dirname $0` +if echo $* | grep linux-musl >/dev/null ; then + echo $* + exit +fi + # Allow wsl if echo $* | grep x86_64-pc-wsl >/dev/null ; then echo $* diff --git a/make/autoconf/buildjdk-spec.gmk.in b/make/autoconf/buildjdk-spec.gmk.in index 7134e34bcee..524f35f417c 100644 --- a/make/autoconf/buildjdk-spec.gmk.in +++ b/make/autoconf/buildjdk-spec.gmk.in @@ -54,11 +54,13 @@ IMAGES_OUTPUTDIR := $(patsubst $(OUTPUTDIR)%,$(BUILDJDK_OUTPUTDIR)%,$(IMAGES_OUT OPENJDK_BUILD_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@ OPENJDK_BUILD_CPU_LEGACY_LIB := @OPENJDK_BUILD_CPU_LEGACY_LIB@ +OPENJDK_BUILD_LIBC := @OPENJDK_BUILD_LIBC@ OPENJDK_TARGET_CPU := @OPENJDK_BUILD_CPU@ OPENJDK_TARGET_CPU_ARCH := @OPENJDK_BUILD_CPU_ARCH@ OPENJDK_TARGET_CPU_BITS := @OPENJDK_BUILD_CPU_BITS@ OPENJDK_TARGET_CPU_ENDIAN := @OPENJDK_BUILD_CPU_ENDIAN@ OPENJDK_TARGET_CPU_LEGACY := @OPENJDK_BUILD_CPU_LEGACY@ +OPENJDK_TARGET_LIBC := @OPENJDK_BUILD_LIBC@ OPENJDK_TARGET_OS_INCLUDE_SUBDIR := @OPENJDK_BUILD_OS_INCLUDE_SUBDIR@ HOTSPOT_TARGET_OS := @HOTSPOT_BUILD_OS@ @@ -66,6 +68,7 @@ HOTSPOT_TARGET_OS_TYPE := @HOTSPOT_BUILD_OS_TYPE@ HOTSPOT_TARGET_CPU := @HOTSPOT_BUILD_CPU@ HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_BUILD_CPU_ARCH@ HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_BUILD_CPU_DEFINE@ +HOTSPOT_TARGET_LIBC := @HOTSPOT_BUILD_LIBC@ CFLAGS_JDKLIB := @OPENJDK_BUILD_CFLAGS_JDKLIB@ CXXFLAGS_JDKLIB := @OPENJDK_BUILD_CXXFLAGS_JDKLIB@ diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4 index 588df7f0011..d4738ad6837 100644 --- a/make/autoconf/flags-cflags.m4 +++ b/make/autoconf/flags-cflags.m4 @@ -231,8 +231,14 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION], # -D_FORTIFY_SOURCE=2 hardening option needs optimization (at least -O1) enabled # set for lower O-levels -U_FORTIFY_SOURCE to overwrite previous settings if test "x$OPENJDK_TARGET_OS" = xlinux -a "x$DEBUG_LEVEL" = "xfastdebug"; then - ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2" DISABLE_FORTIFY_CFLAGS="-U_FORTIFY_SOURCE" + # ASan doesn't work well with _FORTIFY_SOURCE + # See https://github.com/google/sanitizers/wiki/AddressSanitizer#faq + if test "x$ASAN_ENABLED" = xyes; then + ENABLE_FORTIFY_CFLAGS="${DISABLE_FORTIFY_CFLAGS}" + else + ENABLE_FORTIFY_CFLAGS="-D_FORTIFY_SOURCE=2" + fi C_O_FLAG_HIGHEST_JVM="${C_O_FLAG_HIGHEST_JVM} ${ENABLE_FORTIFY_CFLAGS}" C_O_FLAG_HIGHEST="${C_O_FLAG_HIGHEST} ${ENABLE_FORTIFY_CFLAGS}" C_O_FLAG_HI="${C_O_FLAG_HI} ${ENABLE_FORTIFY_CFLAGS}" @@ -558,6 +564,11 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER], fi fi + OS_CFLAGS="$OS_CFLAGS -DLIBC=$OPENJDK_TARGET_LIBC" + if test "x$OPENJDK_TARGET_LIBC" = xmusl; then + OS_CFLAGS="$OS_CFLAGS -DMUSL_LIBC" + fi + # Where does this really belong?? if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then PICFLAG="-fPIC" @@ -652,16 +663,10 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_CPU_DEP], $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -DARCH='\"$FLAGS_CPU_LEGACY\"' \ -D$FLAGS_CPU_LEGACY" - if test "x$FLAGS_CPU_BITS" = x64; then - # -D_LP64=1 is only set on linux and mac. Setting on windows causes diff in - # unpack200.exe. - if test "x$FLAGS_OS" = xlinux || test "x$FLAGS_OS" = xmacosx; then - $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1" - fi - if test "x$FLAGS_OS" != xaix; then - # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it. - $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1" - fi + if test "x$FLAGS_CPU_BITS" = x64 && test "x$FLAGS_OS" != xaix; then + # xlc on AIX defines _LP64=1 by default and issues a warning if we redefine it. + $1_DEFINES_CPU_JDK="${$1_DEFINES_CPU_JDK} -D_LP64=1" + $1_DEFINES_CPU_JVM="${$1_DEFINES_CPU_JVM} -D_LP64=1" fi # toolchain dependend, per-cpu diff --git a/make/autoconf/jdk-options.m4 b/make/autoconf/jdk-options.m4 index 8f58db17d4a..a112a78d624 100644 --- a/make/autoconf/jdk-options.m4 +++ b/make/autoconf/jdk-options.m4 @@ -423,7 +423,10 @@ AC_DEFUN_ONCE([JDKOPT_SETUP_ADDRESS_SANITIZER], fi ], IF_ENABLED: [ - ASAN_CFLAGS="-fsanitize=address -fno-omit-frame-pointer" + # ASan is simply incompatible with gcc -Wstringop-truncation. See + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85650 + # It's harmless to be suppressed in clang as well. + ASAN_CFLAGS="-fsanitize=address -Wno-stringop-truncation -fno-omit-frame-pointer" ASAN_LDFLAGS="-fsanitize=address" JVM_CFLAGS="$JVM_CFLAGS $ASAN_CFLAGS" JVM_LDFLAGS="$JVM_LDFLAGS $ASAN_LDFLAGS" diff --git a/make/autoconf/jvm-features.m4 b/make/autoconf/jvm-features.m4 index 04ca7b4e909..5ad791795a7 100644 --- a/make/autoconf/jvm-features.m4 +++ b/make/autoconf/jvm-features.m4 @@ -306,7 +306,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_GRAAL], # Graal is only available where JVMCI is available since it requires JVMCI. if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then AC_MSG_RESULT([yes]) - elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then + elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU]) @@ -340,7 +340,7 @@ AC_DEFUN_ONCE([JVM_FEATURES_CHECK_JVMCI], AC_MSG_CHECKING([if platform is supported by JVMCI]) if test "x$OPENJDK_TARGET_CPU" = "xx86_64"; then AC_MSG_RESULT([yes]) - elif test "x$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU" = "xlinux-aarch64"; then + elif test "x$OPENJDK_TARGET_CPU" = "xaarch64"; then AC_MSG_RESULT([yes]) else AC_MSG_RESULT([no, $OPENJDK_TARGET_CPU]) diff --git a/make/autoconf/libraries.m4 b/make/autoconf/libraries.m4 index 5120918aed2..e6aafe01550 100644 --- a/make/autoconf/libraries.m4 +++ b/make/autoconf/libraries.m4 @@ -43,9 +43,11 @@ AC_DEFUN_ONCE([LIB_DETERMINE_DEPENDENCIES], if test "x$OPENJDK_TARGET_OS" = xwindows || test "x$OPENJDK_TARGET_OS" = xmacosx; then # No X11 support on windows or macosx NEEDS_LIB_X11=false + elif test "x$ENABLE_HEADLESS_ONLY" = xtrue; then + # No X11 support needed when building headless only + NEEDS_LIB_X11=false else - # All other instances need X11, even if building headless only, libawt still - # needs X11 headers. + # All other instances need X11 NEEDS_LIB_X11=true fi diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 index c0f2446dbd7..2f39d2b0ca7 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -220,6 +220,24 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_OS], esac ]) +# Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. +# Converts autoconf style OS name to OpenJDK style, into +# VAR_LIBC. +AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_LIBC], +[ + case "$1" in + *linux*-musl) + VAR_LIBC=musl + ;; + *linux*-gnu) + VAR_LIBC=gnu + ;; + *) + VAR_LIBC=default + ;; + esac +]) + # Expects $host_os $host_cpu $build_os and $build_cpu # and $with_target_bits to have been setup! # @@ -237,9 +255,10 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD], AC_SUBST(OPENJDK_TARGET_AUTOCONF_NAME) AC_SUBST(OPENJDK_BUILD_AUTOCONF_NAME) - # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables. + # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables. PLATFORM_EXTRACT_VARS_FROM_OS($build_os) PLATFORM_EXTRACT_VARS_FROM_CPU($build_cpu) + PLATFORM_EXTRACT_VARS_FROM_LIBC($build_os) # ..and setup our own variables. (Do this explicitly to facilitate searching) OPENJDK_BUILD_OS="$VAR_OS" if test "x$VAR_OS_TYPE" != x; then @@ -256,6 +275,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD], OPENJDK_BUILD_CPU_ARCH="$VAR_CPU_ARCH" OPENJDK_BUILD_CPU_BITS="$VAR_CPU_BITS" OPENJDK_BUILD_CPU_ENDIAN="$VAR_CPU_ENDIAN" + OPENJDK_BUILD_LIBC="$VAR_LIBC" AC_SUBST(OPENJDK_BUILD_OS) AC_SUBST(OPENJDK_BUILD_OS_TYPE) AC_SUBST(OPENJDK_BUILD_OS_ENV) @@ -263,13 +283,20 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD], AC_SUBST(OPENJDK_BUILD_CPU_ARCH) AC_SUBST(OPENJDK_BUILD_CPU_BITS) AC_SUBST(OPENJDK_BUILD_CPU_ENDIAN) + AC_SUBST(OPENJDK_BUILD_LIBC) AC_MSG_CHECKING([openjdk-build os-cpu]) AC_MSG_RESULT([$OPENJDK_BUILD_OS-$OPENJDK_BUILD_CPU]) - # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU variables. + if test "x$OPENJDK_BUILD_OS" = "xlinux"; then + AC_MSG_CHECKING([openjdk-build C library]) + AC_MSG_RESULT([$OPENJDK_BUILD_LIBC]) + fi + + # Convert the autoconf OS/CPU value to our own data, into the VAR_OS/CPU/LIBC variables. PLATFORM_EXTRACT_VARS_FROM_OS($host_os) PLATFORM_EXTRACT_VARS_FROM_CPU($host_cpu) + PLATFORM_EXTRACT_VARS_FROM_LIBC($host_os) # ... and setup our own variables. (Do this explicitly to facilitate searching) OPENJDK_TARGET_OS="$VAR_OS" if test "x$VAR_OS_TYPE" != x; then @@ -287,6 +314,7 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD], OPENJDK_TARGET_CPU_BITS="$VAR_CPU_BITS" OPENJDK_TARGET_CPU_ENDIAN="$VAR_CPU_ENDIAN" OPENJDK_TARGET_OS_UPPERCASE=`$ECHO $OPENJDK_TARGET_OS | $TR 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` + OPENJDK_TARGET_LIBC="$VAR_LIBC" AC_SUBST(OPENJDK_TARGET_OS) AC_SUBST(OPENJDK_TARGET_OS_TYPE) @@ -296,9 +324,15 @@ AC_DEFUN([PLATFORM_EXTRACT_TARGET_AND_BUILD], AC_SUBST(OPENJDK_TARGET_CPU_ARCH) AC_SUBST(OPENJDK_TARGET_CPU_BITS) AC_SUBST(OPENJDK_TARGET_CPU_ENDIAN) + AC_SUBST(OPENJDK_TARGET_LIBC) AC_MSG_CHECKING([openjdk-target os-cpu]) AC_MSG_RESULT([$OPENJDK_TARGET_OS-$OPENJDK_TARGET_CPU]) + + if test "x$OPENJDK_TARGET_OS" = "xlinux"; then + AC_MSG_CHECKING([openjdk-target C library]) + AC_MSG_RESULT([$OPENJDK_TARGET_LIBC]) + fi ]) # Check if a reduced build (32-bit on 64-bit platforms) is requested, and modify behaviour @@ -420,7 +454,13 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], else OPENJDK_$1_CPU_BUNDLE="$OPENJDK_$1_CPU" fi - OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}" + + OPENJDK_$1_LIBC_BUNDLE="" + if test "x$OPENJDK_$1_LIBC" = "xmusl"; then + OPENJDK_$1_LIBC_BUNDLE="-$OPENJDK_$1_LIBC" + fi + + OPENJDK_$1_BUNDLE_PLATFORM="${OPENJDK_$1_OS_BUNDLE}-${OPENJDK_$1_CPU_BUNDLE}${OPENJDK_$1_LIBC_BUNDLE}" AC_SUBST(OPENJDK_$1_BUNDLE_PLATFORM) if test "x$COMPILE_TYPE" = "xcross"; then @@ -493,6 +533,9 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], fi AC_SUBST(HOTSPOT_$1_CPU_DEFINE) + HOTSPOT_$1_LIBC=$OPENJDK_$1_LIBC + AC_SUBST(HOTSPOT_$1_LIBC) + # For historical reasons, the OS include directories have odd names. OPENJDK_$1_OS_INCLUDE_SUBDIR="$OPENJDK_TARGET_OS" if test "x$OPENJDK_TARGET_OS" = "xwindows"; then @@ -518,9 +561,11 @@ AC_DEFUN([PLATFORM_SET_RELEASE_FILE_OS_VALUES], RELEASE_FILE_OS_NAME="AIX" fi RELEASE_FILE_OS_ARCH=${OPENJDK_TARGET_CPU} + RELEASE_FILE_LIBC=${OPENJDK_TARGET_LIBC} AC_SUBST(RELEASE_FILE_OS_NAME) AC_SUBST(RELEASE_FILE_OS_ARCH) + AC_SUBST(RELEASE_FILE_LIBC) ]) AC_DEFUN([PLATFORM_SET_MODULE_TARGET_OS_VALUES], diff --git a/make/autoconf/spec.gmk.in b/make/autoconf/spec.gmk.in index 14d7a18a0e8..63dc9a5767d 100644 --- a/make/autoconf/spec.gmk.in +++ b/make/autoconf/spec.gmk.in @@ -80,6 +80,8 @@ OPENJDK_TARGET_CPU_ARCH:=@OPENJDK_TARGET_CPU_ARCH@ OPENJDK_TARGET_CPU_BITS:=@OPENJDK_TARGET_CPU_BITS@ OPENJDK_TARGET_CPU_ENDIAN:=@OPENJDK_TARGET_CPU_ENDIAN@ +OPENJDK_TARGET_LIBC:=@OPENJDK_TARGET_LIBC@ + COMPILE_TYPE:=@COMPILE_TYPE@ # Legacy support @@ -95,6 +97,8 @@ HOTSPOT_TARGET_CPU := @HOTSPOT_TARGET_CPU@ HOTSPOT_TARGET_CPU_ARCH := @HOTSPOT_TARGET_CPU_ARCH@ HOTSPOT_TARGET_CPU_DEFINE := @HOTSPOT_TARGET_CPU_DEFINE@ +HOTSPOT_TARGET_LIBC := @HOTSPOT_TARGET_LIBC@ + OPENJDK_TARGET_BUNDLE_PLATFORM:=@OPENJDK_TARGET_BUNDLE_PLATFORM@ JDK_ARCH_ABI_PROP_NAME := @JDK_ARCH_ABI_PROP_NAME@ @@ -109,6 +113,8 @@ OPENJDK_BUILD_CPU_ARCH:=@OPENJDK_BUILD_CPU_ARCH@ OPENJDK_BUILD_CPU_BITS:=@OPENJDK_BUILD_CPU_BITS@ OPENJDK_BUILD_CPU_ENDIAN:=@OPENJDK_BUILD_CPU_ENDIAN@ +OPENJDK_BUILD_LIBC:=@OPENJDK_BUILD_LIBC@ + OPENJDK_BUILD_OS_INCLUDE_SUBDIR:=@OPENJDK_TARGET_OS_INCLUDE_SUBDIR@ # Target platform value in ModuleTarget class file attribute. @@ -117,6 +123,7 @@ OPENJDK_MODULE_TARGET_PLATFORM:=@OPENJDK_MODULE_TARGET_PLATFORM@ # OS_* properties in release file RELEASE_FILE_OS_NAME:=@RELEASE_FILE_OS_NAME@ RELEASE_FILE_OS_ARCH:=@RELEASE_FILE_OS_ARCH@ +RELEASE_FILE_LIBC:=@RELEASE_FILE_LIBC@ SOURCE_DATE := @SOURCE_DATE@ ENABLE_REPRODUCIBLE_BUILD := @ENABLE_REPRODUCIBLE_BUILD@ @@ -637,6 +644,7 @@ JARSIGNER=@FIXPATH@ $(JARSIGNER_CMD) BUILD_JAVA_FLAGS := @BOOTCYCLE_JVM_ARGS_BIG@ BUILD_JAVA=@FIXPATH@ $(BUILD_JDK)/bin/java $(BUILD_JAVA_FLAGS) +BUILD_JAVAC=@FIXPATH@ $(BUILD_JDK)/bin/javac BUILD_JAR=@FIXPATH@ $(BUILD_JDK)/bin/jar # Interim langtools modules and arguments @@ -751,7 +759,6 @@ TAR_SUPPORTS_TRANSFORM:=@TAR_SUPPORTS_TRANSFORM@ # Build setup ENABLE_AOT:=@ENABLE_AOT@ -ENABLE_INTREE_EC:=@ENABLE_INTREE_EC@ USE_EXTERNAL_LIBJPEG:=@USE_EXTERNAL_LIBJPEG@ USE_EXTERNAL_LIBGIF:=@USE_EXTERNAL_LIBGIF@ USE_EXTERNAL_LIBZ:=@USE_EXTERNAL_LIBZ@ @@ -869,10 +876,14 @@ INTERIM_JMODS_DIR := $(SUPPORT_OUTPUTDIR)/interim-jmods INTERIM_IMAGE_DIR := $(SUPPORT_OUTPUTDIR)/interim-image # Docs image -DOCS_IMAGE_SUBDIR := docs -DOCS_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_IMAGE_SUBDIR) +DOCS_JDK_IMAGE_SUBDIR := docs +DOCS_JDK_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JDK_IMAGE_SUBDIR) +DOCS_JAVASE_IMAGE_SUBDIR := docs-javase +DOCS_JAVASE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_JAVASE_IMAGE_SUBDIR) +DOCS_REFERENCE_IMAGE_SUBDIR := docs-reference +DOCS_REFERENCE_IMAGE_DIR = $(IMAGES_OUTPUTDIR)/$(DOCS_REFERENCE_IMAGE_SUBDIR) # Output docs directly into image -DOCS_OUTPUTDIR := $(DOCS_IMAGE_DIR) +DOCS_OUTPUTDIR := $(DOCS_JDK_IMAGE_DIR) # Static libs image STATIC_LIBS_IMAGE_SUBDIR := static-libs @@ -915,7 +926,9 @@ JRE_BUNDLE_NAME := jre-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION) JDK_SYMBOLS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin$(DEBUG_PART)-symbols.tar.gz TEST_DEMOS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests-demos$(DEBUG_PART).tar.gz TEST_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-tests$(DEBUG_PART).tar.gz -DOCS_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz +DOCS_JDK_BUNDLE_NAME := jdk-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz +DOCS_JAVASE_BUNDLE_NAME := javase-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz +DOCS_REFERENCE_BUNDLE_NAME := jdk-reference-$(BASE_NAME)_doc-api-spec$(DEBUG_PART).tar.gz STATIC_LIBS_BUNDLE_NAME := jdk-$(BASE_NAME)_bin-static-libs$(DEBUG_PART).tar.gz JCOV_BUNDLE_NAME := jdk-jcov-$(BASE_NAME)_bin$(DEBUG_PART).$(JDK_BUNDLE_EXTENSION) @@ -924,7 +937,9 @@ JRE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JRE_BUNDLE_NAME) JDK_SYMBOLS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JDK_SYMBOLS_BUNDLE_NAME) TEST_DEMOS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_DEMOS_BUNDLE_NAME) TEST_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(TEST_BUNDLE_NAME) -DOCS_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_BUNDLE_NAME) +DOCS_JDK_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JDK_BUNDLE_NAME) +DOCS_JAVASE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_JAVASE_BUNDLE_NAME) +DOCS_REFERENCE_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(DOCS_REFERENCE_BUNDLE_NAME) JCOV_BUNDLE := $(BUNDLES_OUTPUTDIR)/$(JCOV_BUNDLE_NAME) # This macro is called to allow inclusion of closed source counterparts. diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk index e0b1fb003d4..bedb971115b 100644 --- a/make/common/JavaCompilation.gmk +++ b/make/common/JavaCompilation.gmk @@ -202,6 +202,13 @@ define SetupJavaCompilationBody # If unspecified, default to the new jdk we're building $1_TARGET_RELEASE := $$(TARGET_RELEASE_BOOTJDK) endif + else ifeq ($$($1_COMPILER), buildjdk) + $1_JAVAC_CMD := $$(BUILD_JAVAC) + + ifeq ($$($1_TARGET_RELEASE), ) + # If unspecified, default to the new jdk we're building + $1_TARGET_RELEASE := $$(TARGET_RELEASE_NEWJDK) + endif else ifeq ($$($1_COMPILER), interim) # Use java server if it is enabled, and the user does not want a specialized # class path. @@ -304,9 +311,11 @@ define SetupJavaCompilationBody ifneq ($$($1_KEEP_DUPS), true) # Remove duplicate source files by keeping the first found of each duplicate. # This allows for automatic overrides with custom or platform specific versions - # source files. + # source files. Need to call DoubleDollar as we have java classes with '$' in + # their names. $1_SRCS := $$(strip $$(foreach s, $$($1_SRCS), \ - $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), $$(s))) \ + $$(eval relative_src := $$(call remove-prefixes, $$($1_SRC), \ + $$(call DoubleDollar, $$(s)))) \ $$(if $$($1_$$(relative_src)), \ , \ $$(eval $1_$$(relative_src) := 1) $$(s)))) diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk index 21134966dc0..6a963ac2c49 100644 --- a/make/common/JdkNativeCompilation.gmk +++ b/make/common/JdkNativeCompilation.gmk @@ -77,8 +77,10 @@ ifeq ($(STATIC_LIBS), true) FindStaticLib = endif +# Returns the module specific java header dir if it exists. +# Param 1 - module name GetJavaHeaderDir = \ - $(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1)) + $(if $(strip $1),$(wildcard $(SUPPORT_OUTPUTDIR)/headers/$(strip $1))) # Process a dir description such as "java.base:headers" into a set of proper absolute paths. ProcessDir = \ @@ -123,15 +125,27 @@ JDK_RCFLAGS=$(RCFLAGS) \ SetupJdkLibrary = $(NamedParamsMacroTemplate) define SetupJdkLibraryBody ifeq ($$($1_OUTPUT_DIR), ) - $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE)) + ifneq ($$(MODULE), ) + $1_OUTPUT_DIR := $$(call FindLibDirForModule, $$(MODULE)) + else + $$(error Must specify OUTPUT_DIR in a MODULE free context) + endif endif ifeq ($$($1_OBJECT_DIR), ) - $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME) + ifneq ($$(MODULE), ) + $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/lib$$($1_NAME) + else + $$(error Must specify OBJECT_DIR in a MODULE free context) + endif endif ifeq ($$($1_SRC), ) - $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME)) + ifneq ($$(MODULE), ) + $1_SRC := $$(call FindSrcDirsForLib, $$(MODULE), $$($1_NAME)) + else + $$(error Must specify SRC in a MODULE free context) + endif else $1_SRC := $$(foreach dir, $$($1_SRC), $$(call ProcessDir, $$(dir))) endif @@ -165,7 +179,8 @@ define SetupJdkLibraryBody ifneq ($$($1_HEADERS_FROM_SRC), false) $1_SRC_HEADER_FLAGS := $$(addprefix -I, $$(wildcard $$($1_SRC))) endif - # Always add the java header dir + + # Add the module specific java header dir $1_SRC_HEADER_FLAGS += $$(addprefix -I, $$(call GetJavaHeaderDir, $$(MODULE))) ifneq ($$($1_EXTRA_HEADER_DIRS), ) @@ -203,11 +218,19 @@ define SetupJdkExecutableBody $1_TYPE := EXECUTABLE ifeq ($$($1_OUTPUT_DIR), ) - $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE)) + ifneq ($$(MODULE), ) + $1_OUTPUT_DIR := $$(call FindExecutableDirForModule, $$(MODULE)) + else + $$(error Must specify OUTPUT_DIR in a MODULE free context) + endif endif ifeq ($$($1_OBJECT_DIR), ) - $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME) + ifneq ($$(MODULE), ) + $1_OBJECT_DIR := $$(SUPPORT_OUTPUTDIR)/native/$$(MODULE)/$$($1_NAME) + else + $$(error Must specify OBJECT_DIR in a MODULE free context) + endif endif ifeq ($$($1_VERSIONINFO_RESOURCE), ) diff --git a/make/common/Modules.gmk b/make/common/Modules.gmk index 72e19840501..10aacff4726 100644 --- a/make/common/Modules.gmk +++ b/make/common/Modules.gmk @@ -59,6 +59,7 @@ BOOT_MODULES += \ java.security.sasl \ java.xml \ jdk.incubator.foreign \ + jdk.incubator.vector \ jdk.internal.vm.ci \ jdk.jfr \ jdk.management \ @@ -124,7 +125,7 @@ endif JRE_TOOL_MODULES += \ jdk.jdwp.agent \ - jdk.incubator.jpackage \ + jdk.jpackage \ # ################################################################################ @@ -144,7 +145,8 @@ DOCS_MODULES += \ jdk.editpad \ jdk.hotspot.agent \ jdk.httpserver \ - jdk.incubator.jpackage \ + jdk.jpackage \ + jdk.incubator.vector \ jdk.jartool \ jdk.javadoc \ jdk.jcmd \ @@ -226,7 +228,7 @@ endif # jpackage is only on windows, macosx, and linux ifeq ($(call isTargetOs, windows macosx linux), false) - MODULES_FILTER += jdk.incubator.jpackage + MODULES_FILTER += jdk.jpackage endif ################################################################################ diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js index e70d6617745..36460fee4b4 100644 --- a/make/conf/jib-profiles.js +++ b/make/conf/jib-profiles.js @@ -251,6 +251,8 @@ var getJibProfilesCommon = function (input, data) { configure_args: concat("--enable-jtreg-failure-handler", "--with-exclude-translations=de,es,fr,it,ko,pt_BR,sv,ca,tr,cs,sk,ja_JP_A,ja_JP_HA,ja_JP_HI,ja_JP_I,zh_TW,zh_HK", "--disable-manpages", + "--disable-jvm-feature-aot", + "--disable-jvm-feature-graal", "--disable-jvm-feature-shenandoahgc", versionArgs(input, common)) }; @@ -404,12 +406,11 @@ var getJibProfilesProfiles = function (input, common, data) { "linux-x64": { target_os: "linux", target_cpu: "x64", - dependencies: ["devkit", "gtest", "graphviz", "pandoc", "graalunit_lib"], + dependencies: ["devkit", "gtest", "graphviz", "pandoc"], configure_args: concat(common.configure_args_64bit, - "--enable-full-docs", "--with-zlib=system", + "--with-zlib=system", "--disable-dtrace", (isWsl(input) ? [ "--host=x86_64-unknown-linux-gnu", "--build=x86_64-unknown-linux-gnu" ] : [])), - default_make_targets: ["docs-bundles"], }, "linux-x86": { @@ -424,7 +425,7 @@ var getJibProfilesProfiles = function (input, common, data) { "macosx-x64": { target_os: "macosx", target_cpu: "x64", - dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"], + dependencies: ["devkit", "gtest", "pandoc"], configure_args: concat(common.configure_args_64bit, "--with-zlib=system", "--with-macosx-version-max=10.9.0", // Use system SetFile instead of the one in the devkit as the @@ -435,7 +436,7 @@ var getJibProfilesProfiles = function (input, common, data) { "windows-x64": { target_os: "windows", target_cpu: "x64", - dependencies: ["devkit", "gtest", "pandoc", "graalunit_lib"], + dependencies: ["devkit", "gtest", "pandoc"], configure_args: concat(common.configure_args_64bit), }, @@ -455,8 +456,6 @@ var getJibProfilesProfiles = function (input, common, data) { configure_args: [ "--openjdk-target=aarch64-linux-gnu", "--disable-jvm-feature-jvmci", - "--disable-jvm-feature-graal", - "--disable-jvm-feature-aot", ], }, @@ -680,20 +679,47 @@ var getJibProfilesProfiles = function (input, common, data) { common.debug_profile_artifacts(artifactData[name])); }); - profilesArtifacts = { - "linux-x64": { + buildJdkDep = input.build_os + "-" + input.build_cpu + ".jdk"; + docsProfiles = { + "docs": { + target_os: input.build_os, + target_cpu: input.build_cpu, + dependencies: [ + "boot_jdk", "devkit", "graphviz", "pandoc", buildJdkDep, + ], + configure_args: concat( + "--enable-full-docs", + versionArgs(input, common), + "--with-build-jdk=" + input.get(buildJdkDep, "home_path") + + (input.build_os == "macosx" ? "/Contents/Home" : "") + ), + default_make_targets: ["all-docs-bundles"], artifacts: { doc_api_spec: { - local: "bundles/\\(jdk.*doc-api-spec.tar.gz\\)", + local: "bundles/\\(jdk-" + data.version + ".*doc-api-spec.tar.gz\\)", remote: [ "bundles/common/jdk-" + data.version + "_doc-api-spec.tar.gz", "bundles/common/\\1" ], }, + javase_doc_api_spec: { + local: "bundles/\\(javase-" + data.version + ".*doc-api-spec.tar.gz\\)", + remote: [ + "bundles/common/javase-" + data.version + "_doc-api-spec.tar.gz", + "bundles/common/\\1" + ], + }, + reference_doc_api_spec: { + local: "bundles/\\(jdk-reference-" + data.version + ".*doc-api-spec.tar.gz\\)", + remote: [ + "bundles/common/jdk-reference-" + data.version + "_doc-api-spec.tar.gz", + "bundles/common/\\1" + ], + }, } } }; - profiles = concatObjects(profiles, profilesArtifacts); + profiles = concatObjects(profiles, docsProfiles); // Generate open only profiles for all the main and debug profiles. // Rewrite artifact remote paths by adding "openjdk/GPL". @@ -960,7 +986,7 @@ var getJibProfilesDependencies = function (input, common) { var devkit_platform_revisions = { linux_x64: "gcc10.2.0-OL6.4+1.0", - macosx_x64: "Xcode11.3.1-MacOSX10.15+1.0", + macosx_x64: "Xcode11.3.1-MacOSX10.15+1.1", windows_x64: "VS2019-16.7.2+1.0", linux_aarch64: "gcc10.2.0-OL7.6+1.0", linux_arm: "gcc8.2.0-Fedora27+1.0", @@ -1126,15 +1152,6 @@ var getJibProfilesDependencies = function (input, common) { configure_args: "", }, - graalunit_lib: { - organization: common.organization, - ext: "zip", - revision: "619_Apr_12_2018", - module: "graalunit-lib", - configure_args: "--with-graalunit-lib=" + input.get("graalunit_lib", "install_path"), - environment_name: "GRAALUNIT_LIB" - }, - gtest: { organization: common.organization, ext: "tar.gz", diff --git a/make/data/blacklistedcertsconverter/blacklisted.certs.pem b/make/data/blacklistedcertsconverter/blacklisted.certs.pem index 191e94e12a5..688becbc493 100644 --- a/make/data/blacklistedcertsconverter/blacklisted.certs.pem +++ b/make/data/blacklistedcertsconverter/blacklisted.certs.pem @@ -1,8 +1,7 @@ #! java BlacklistedCertsConverter SHA-256 -# The line above must be the first line of the blacklisted.certs.pem -# file inside src/share/lib/security/. It will be ignored if added in -# src/closed/share/lib/security/blacklisted.certs.pem. +# The line above must be the first line of this file. Do not +# remove it. // Subject: CN=Digisign Server ID (Enrich), // OU=457608-K, diff --git a/make/data/tzdata/VERSION b/make/data/tzdata/VERSION index e96a6d78497..94ba7462f2e 100644 --- a/make/data/tzdata/VERSION +++ b/make/data/tzdata/VERSION @@ -21,4 +21,4 @@ # or visit www.oracle.com if you need additional information or have any # questions. # -tzdata2020a +tzdata2020d diff --git a/make/data/tzdata/africa b/make/data/tzdata/africa index 7947bc55b00..e1c3d8929e6 100644 --- a/make/data/tzdata/africa +++ b/make/data/tzdata/africa @@ -87,7 +87,7 @@ # Corrections are welcome. # Algeria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Algeria 1916 only - Jun 14 23:00s 1:00 S Rule Algeria 1916 1919 - Oct Sun>=1 23:00s 0 - Rule Algeria 1917 only - Mar 24 23:00s 1:00 S @@ -110,10 +110,9 @@ Rule Algeria 1978 only - Mar 24 1:00 1:00 S Rule Algeria 1978 only - Sep 22 3:00 0 - Rule Algeria 1980 only - Apr 25 0:00 1:00 S Rule Algeria 1980 only - Oct 31 2:00 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. +# See Europe/Paris for PMT-related transitions. # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 15 0:01 +Zone Africa/Algiers 0:12:12 - LMT 1891 Mar 16 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time 0:00 Algeria WE%sT 1940 Feb 25 2:00 1:00 Algeria CE%sT 1946 Oct 7 @@ -199,7 +198,7 @@ Link Africa/Abidjan Atlantic/St_Helena # St Helena # Egypt was mean noon at the Great Pyramid, 2:04:30.5, but apparently this # did not apply to Cairo, Alexandria, or Port Said. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Egypt 1940 only - Jul 15 0:00 1:00 S Rule Egypt 1940 only - Oct 1 0:00 0 - Rule Egypt 1941 only - Apr 15 0:00 1:00 S @@ -434,7 +433,7 @@ Zone Africa/Cairo 2:05:09 - LMT 1900 Oct # now Ghana observed different DST regimes in different years. For # lack of better info, use Shanks except treat the minus sign as a # typo, and assume DST started in 1920 not 1936. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Ghana 1920 1942 - Sep 1 0:00 0:20 - Rule Ghana 1920 1942 - Dec 31 0:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -524,7 +523,7 @@ Zone Africa/Monrovia -0:43:08 - LMT 1882 # From Paul Eggert (2013-10-25): # For now, assume they're reverting to the pre-2012 rules of permanent UT +02. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Libya 1951 only - Oct 14 2:00 1:00 S Rule Libya 1952 only - Jan 1 0:00 0 - Rule Libya 1953 only - Oct 9 2:00 1:00 S @@ -647,7 +646,7 @@ Zone Africa/Tripoli 0:52:44 - LMT 1920 # "The trial ended on March 29, 2009, when the clocks moved back by one hour # at 2am (or 02:00) local time..." -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mauritius 1982 only - Oct 10 0:00 1:00 - Rule Mauritius 1983 only - Mar 21 0:00 0 - Rule Mauritius 2008 only - Oct lastSun 2:00 1:00 - @@ -898,17 +897,30 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # https://maroc-diplomatique.net/maroc-le-retour-a-lheure-gmt-est-prevu-dimanche-prochain/ # http://aujourdhui.ma/actualite/gmt1-retour-a-lheure-normale-dimanche-prochain-1 # -# From Paul Eggert (2020-04-14): +# From Milamber (2020-05-31) +# In Morocco (where I live), the end of Ramadan (Arabic month) is followed by +# the Eid al-Fitr, and concretely it's 1 or 2 day offs for the people (with +# traditional visiting of family, big lunches/dinners, etc.). So for this +# year the astronomical calculations don't include the following 2 days off in +# the calc. These 2 days fall in a Sunday/Monday, so it's not acceptable by +# people to have a time shift during these 2 days off. Perhaps you can modify +# the (predicted) rules for next years: if the end of Ramadan is a (probable) +# Friday or Saturday (and so the 2 days off are on a weekend), the next time +# shift will be the next weekend. +# +# From Paul Eggert (2020-05-31): # For now, guess that in the future Morocco will fall back at 03:00 # the last Sunday before Ramadan, and spring forward at 02:00 the -# first Sunday after the day after Ramadan. To implement this, -# transition dates for 2021 through 2087 were determined by running -# the following program under GNU Emacs 26.3. -# (let ((islamic-year 1442)) +# first Sunday after two days after Ramadan. To implement this, +# transition dates and times for 2019 through 2087 were determined by +# running the following program under GNU Emacs 26.3. (This algorithm +# also produces the correct transition dates for 2016 through 2018, +# though the times differ due to Morocco's time zone change in 2018.) +# (let ((islamic-year 1440)) # (require 'cal-islam) # (while (< islamic-year 1511) # (let ((a (calendar-islamic-to-absolute (list 9 1 islamic-year))) -# (b (1+ (calendar-islamic-to-absolute (list 10 1 islamic-year)))) +# (b (+ 2 (calendar-islamic-to-absolute (list 10 1 islamic-year)))) # (sunday 0)) # (while (/= sunday (mod (setq a (1- a)) 7))) # (while (/= sunday (mod b 7)) @@ -923,7 +935,7 @@ Zone Indian/Mauritius 3:50:00 - LMT 1907 # Port Louis # (car (cdr (cdr b))) (calendar-month-name (car b) t) (car (cdr b))))) # (setq islamic-year (+ 1 islamic-year)))) -# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Morocco 1939 only - Sep 12 0:00 1:00 - Rule Morocco 1939 only - Nov 19 0:00 0 - Rule Morocco 1940 only - Feb 25 0:00 1:00 - @@ -974,7 +986,7 @@ Rule Morocco 2021 only - May 16 2:00 0 - Rule Morocco 2022 only - Mar 27 3:00 -1:00 - Rule Morocco 2022 only - May 8 2:00 0 - Rule Morocco 2023 only - Mar 19 3:00 -1:00 - -Rule Morocco 2023 only - Apr 23 2:00 0 - +Rule Morocco 2023 only - Apr 30 2:00 0 - Rule Morocco 2024 only - Mar 10 3:00 -1:00 - Rule Morocco 2024 only - Apr 14 2:00 0 - Rule Morocco 2025 only - Feb 23 3:00 -1:00 - @@ -990,7 +1002,7 @@ Rule Morocco 2029 only - Feb 18 2:00 0 - Rule Morocco 2029 only - Dec 30 3:00 -1:00 - Rule Morocco 2030 only - Feb 10 2:00 0 - Rule Morocco 2030 only - Dec 22 3:00 -1:00 - -Rule Morocco 2031 only - Jan 26 2:00 0 - +Rule Morocco 2031 only - Feb 2 2:00 0 - Rule Morocco 2031 only - Dec 14 3:00 -1:00 - Rule Morocco 2032 only - Jan 18 2:00 0 - Rule Morocco 2032 only - Nov 28 3:00 -1:00 - @@ -1006,7 +1018,7 @@ Rule Morocco 2036 only - Nov 23 2:00 0 - Rule Morocco 2037 only - Oct 4 3:00 -1:00 - Rule Morocco 2037 only - Nov 15 2:00 0 - Rule Morocco 2038 only - Sep 26 3:00 -1:00 - -Rule Morocco 2038 only - Oct 31 2:00 0 - +Rule Morocco 2038 only - Nov 7 2:00 0 - Rule Morocco 2039 only - Sep 18 3:00 -1:00 - Rule Morocco 2039 only - Oct 23 2:00 0 - Rule Morocco 2040 only - Sep 2 3:00 -1:00 - @@ -1022,7 +1034,7 @@ Rule Morocco 2044 only - Aug 28 2:00 0 - Rule Morocco 2045 only - Jul 9 3:00 -1:00 - Rule Morocco 2045 only - Aug 20 2:00 0 - Rule Morocco 2046 only - Jul 1 3:00 -1:00 - -Rule Morocco 2046 only - Aug 5 2:00 0 - +Rule Morocco 2046 only - Aug 12 2:00 0 - Rule Morocco 2047 only - Jun 23 3:00 -1:00 - Rule Morocco 2047 only - Jul 28 2:00 0 - Rule Morocco 2048 only - Jun 7 3:00 -1:00 - @@ -1038,7 +1050,7 @@ Rule Morocco 2052 only - Jun 2 2:00 0 - Rule Morocco 2053 only - Apr 13 3:00 -1:00 - Rule Morocco 2053 only - May 25 2:00 0 - Rule Morocco 2054 only - Apr 5 3:00 -1:00 - -Rule Morocco 2054 only - May 10 2:00 0 - +Rule Morocco 2054 only - May 17 2:00 0 - Rule Morocco 2055 only - Mar 28 3:00 -1:00 - Rule Morocco 2055 only - May 2 2:00 0 - Rule Morocco 2056 only - Mar 12 3:00 -1:00 - @@ -1054,7 +1066,7 @@ Rule Morocco 2060 only - Mar 7 2:00 0 - Rule Morocco 2061 only - Jan 16 3:00 -1:00 - Rule Morocco 2061 only - Feb 27 2:00 0 - Rule Morocco 2062 only - Jan 8 3:00 -1:00 - -Rule Morocco 2062 only - Feb 12 2:00 0 - +Rule Morocco 2062 only - Feb 19 2:00 0 - Rule Morocco 2062 only - Dec 31 3:00 -1:00 - Rule Morocco 2063 only - Feb 4 2:00 0 - Rule Morocco 2063 only - Dec 16 3:00 -1:00 - @@ -1070,7 +1082,7 @@ Rule Morocco 2067 only - Dec 11 2:00 0 - Rule Morocco 2068 only - Oct 21 3:00 -1:00 - Rule Morocco 2068 only - Dec 2 2:00 0 - Rule Morocco 2069 only - Oct 13 3:00 -1:00 - -Rule Morocco 2069 only - Nov 17 2:00 0 - +Rule Morocco 2069 only - Nov 24 2:00 0 - Rule Morocco 2070 only - Oct 5 3:00 -1:00 - Rule Morocco 2070 only - Nov 9 2:00 0 - Rule Morocco 2071 only - Sep 20 3:00 -1:00 - @@ -1086,7 +1098,7 @@ Rule Morocco 2075 only - Sep 15 2:00 0 - Rule Morocco 2076 only - Jul 26 3:00 -1:00 - Rule Morocco 2076 only - Sep 6 2:00 0 - Rule Morocco 2077 only - Jul 18 3:00 -1:00 - -Rule Morocco 2077 only - Aug 22 2:00 0 - +Rule Morocco 2077 only - Aug 29 2:00 0 - Rule Morocco 2078 only - Jul 10 3:00 -1:00 - Rule Morocco 2078 only - Aug 14 2:00 0 - Rule Morocco 2079 only - Jun 25 3:00 -1:00 - @@ -1096,13 +1108,13 @@ Rule Morocco 2080 only - Jul 21 2:00 0 - Rule Morocco 2081 only - Jun 1 3:00 -1:00 - Rule Morocco 2081 only - Jul 13 2:00 0 - Rule Morocco 2082 only - May 24 3:00 -1:00 - -Rule Morocco 2082 only - Jun 28 2:00 0 - +Rule Morocco 2082 only - Jul 5 2:00 0 - Rule Morocco 2083 only - May 16 3:00 -1:00 - Rule Morocco 2083 only - Jun 20 2:00 0 - Rule Morocco 2084 only - Apr 30 3:00 -1:00 - Rule Morocco 2084 only - Jun 11 2:00 0 - Rule Morocco 2085 only - Apr 22 3:00 -1:00 - -Rule Morocco 2085 only - May 27 2:00 0 - +Rule Morocco 2085 only - Jun 3 2:00 0 - Rule Morocco 2086 only - Apr 14 3:00 -1:00 - Rule Morocco 2086 only - May 19 2:00 0 - Rule Morocco 2087 only - Mar 30 3:00 -1:00 - @@ -1203,7 +1215,7 @@ Link Africa/Maputo Africa/Lusaka # Zambia # Use plain "WAT" and "CAT" for the time zone abbreviations, to be compatible # with Namibia's neighbors. -# RULE NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Vanguard section, for zic and other parsers that support negative DST. Rule Namibia 1994 only - Mar 21 0:00 -1:00 WAT Rule Namibia 1994 2017 - Sep Sun>=1 2:00 0 CAT @@ -1326,7 +1338,7 @@ Zone Indian/Mahe 3:41:48 - LMT 1906 Jun # Victoria # See Africa/Nairobi. # South Africa -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule SA 1942 1943 - Sep Sun>=15 2:00 1:00 - Rule SA 1943 1944 - Mar Sun>=15 2:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1359,7 +1371,7 @@ Link Africa/Johannesburg Africa/Mbabane # Eswatini # Abdalla of NTC, archived at: # https://mm.icann.org/pipermail/tz/2017-October/025333.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Sudan 1970 only - May 1 0:00 1:00 S Rule Sudan 1970 1985 - Oct 15 0:00 0 - Rule Sudan 1971 only - Apr 30 0:00 1:00 S @@ -1447,7 +1459,7 @@ Zone Africa/Juba 2:06:28 - LMT 1931 # http://www.almadenahnews.com/newss/news.php?c=118&id=38036 # http://www.worldtimezone.com/dst_news/dst_news_tunis02.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Tunisia 1939 only - Apr 15 23:00s 1:00 S Rule Tunisia 1939 only - Nov 18 23:00s 0 - Rule Tunisia 1940 only - Feb 25 23:00s 1:00 S @@ -1474,9 +1486,7 @@ Rule Tunisia 2005 only - Sep 30 1:00s 0 - Rule Tunisia 2006 2008 - Mar lastSun 2:00s 1:00 S Rule Tunisia 2006 2008 - Oct lastSun 2:00s 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. -# Shanks & Pottenger say the 1911 switch was on Mar 9; go with Howse's Mar 11. +# See Europe/Paris for PMT-related transitions. # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Africa/Tunis 0:40:44 - LMT 1881 May 12 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time diff --git a/make/data/tzdata/antarctica b/make/data/tzdata/antarctica index 6239f837e9f..509fadc29a9 100644 --- a/make/data/tzdata/antarctica +++ b/make/data/tzdata/antarctica @@ -93,15 +93,30 @@ # Australian Antarctica Division informed us that Casey changed time # zone to UTC+11 in "the morning of 22nd October 2016". +# From Steffen Thorsen (2020-10-02, as corrected): +# Based on information we have received from the Australian Antarctic +# Division, Casey station and Macquarie Island station will move to Tasmanian +# daylight savings time on Sunday 4 October. This will take effect from 0001 +# hrs on Sunday 4 October 2020 and will mean Casey and Macquarie Island will +# be on the same time zone as Hobart. Some past dates too for this 3 hour +# time change back and forth between UTC+8 and UTC+11 for Casey: +# - 2018 Oct 7 4:00 - 2019 Mar 17 3:00 - 2019 Oct 4 3:00 - 2020 Mar 8 3:00 +# and now - 2020 Oct 4 0:01 + # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Antarctica/Casey 0 - -00 1969 - 8:00 - +08 2009 Oct 18 2:00 +Zone Antarctica/Casey 0 - -00 1969 + 8:00 - +08 2009 Oct 18 2:00 11:00 - +11 2010 Mar 5 2:00 - 8:00 - +08 2011 Oct 28 2:00 + 8:00 - +08 2011 Oct 28 2:00 11:00 - +11 2012 Feb 21 17:00u - 8:00 - +08 2016 Oct 22 + 8:00 - +08 2016 Oct 22 11:00 - +11 2018 Mar 11 4:00 - 8:00 - +08 + 8:00 - +08 2018 Oct 7 4:00 + 11:00 - +11 2019 Mar 17 3:00 + 8:00 - +08 2019 Oct 4 3:00 + 11:00 - +11 2020 Mar 8 3:00 + 8:00 - +08 2020 Oct 4 0:01 + 11:00 - +11 Zone Antarctica/Davis 0 - -00 1957 Jan 13 7:00 - +07 1964 Nov 0 - -00 1969 Feb @@ -247,7 +262,7 @@ Zone Antarctica/Syowa 0 - -00 1957 Jan 29 # suggested by Bengt-Inge Larsson comment them out for now, and approximate # with only UTC and CEST. Uncomment them when 2014b is more prevalent. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S #Rule Troll 2005 max - Mar 1 1:00u 1:00 +01 Rule Troll 2005 max - Mar lastSun 1:00u 2:00 +02 #Rule Troll 2005 max - Oct lastSun 1:00u 1:00 +01 diff --git a/make/data/tzdata/asia b/make/data/tzdata/asia index 0700aa46b41..acca6554fa2 100644 --- a/make/data/tzdata/asia +++ b/make/data/tzdata/asia @@ -93,7 +93,7 @@ ############################################################################### # These rules are stolen from the 'europe' file. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EUAsia 1981 max - Mar lastSun 1:00u 1:00 S Rule EUAsia 1979 1995 - Sep lastSun 1:00u 0 - Rule EUAsia 1996 max - Oct lastSun 1:00u 0 - @@ -137,7 +137,7 @@ Zone Asia/Kabul 4:36:48 - LMT 1890 # or # (brief) # http://www.worldtimezone.com/dst_news/dst_news_armenia03.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Armenia 2011 only - Mar lastSun 2:00s 1:00 - Rule Armenia 2011 only - Oct lastSun 2:00s 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -163,7 +163,7 @@ Zone Asia/Yerevan 2:58:00 - LMT 1924 May 2 # http://vestnikkavkaza.net/news/Azerbaijani-Cabinet-of-Ministers-cancels-daylight-saving-time.html # http://en.apa.az/xeber_azerbaijan_abolishes_daylight_savings_ti_240862.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Azer 1997 2015 - Mar lastSun 4:00 1:00 - Rule Azer 1997 2015 - Oct lastSun 5:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -250,7 +250,7 @@ Zone Asia/Baku 3:19:24 - LMT 1924 May 2 # http://www.thedailystar.net/newDesign/latest_news.php?nid=22817 # http://www.worldtimezone.com/dst_news/dst_news_bangladesh06.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Dhaka 2009 only - Jun 19 23:00 1:00 - Rule Dhaka 2009 only - Dec 31 24:00 0 - @@ -326,7 +326,7 @@ Zone Asia/Yangon 6:24:47 - LMT 1880 # or Rangoon # generally esteemed a success, it was announced early in 1920 that it would # not be repeated." # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Shang 1919 only - Apr 12 24:00 1:00 D Rule Shang 1919 only - Sep 30 24:00 0 S @@ -422,7 +422,7 @@ Rule Shang 1919 only - Sep 30 24:00 0 S # the Yangtze river delta area during that period of time although the scope # of such use will need to be investigated to determine. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Shang 1940 only - Jun 1 0:00 1:00 D Rule Shang 1940 only - Oct 12 24:00 0 S Rule Shang 1941 only - Mar 15 0:00 1:00 D @@ -485,7 +485,7 @@ Rule Shang 1948 1949 - Sep 30 24:00 0 S #plan # to begin on 17 April. # http://data.people.com.cn/pic/101p/1988/04/1988041201.jpg -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule PRC 1986 only - May 4 2:00 1:00 D Rule PRC 1986 1991 - Sep Sun>=11 2:00 0 S Rule PRC 1987 1991 - Apr Sun>=11 2:00 1:00 D @@ -869,7 +869,7 @@ Zone Asia/Urumqi 5:50:20 - LMT 1928 # or dates for the 1942 and 1945 transitions. # The Japanese occupation of Hong Kong began 1941-12-25. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule HK 1946 only - Apr 21 0:00 1:00 S Rule HK 1946 only - Dec 1 3:30s 0 - Rule HK 1947 only - Apr 13 3:30s 1:00 S @@ -996,7 +996,7 @@ Zone Asia/Hong_Kong 7:36:42 - LMT 1904 Oct 30 0:36:42 # until 1945-09-21 at 01:00, overriding Shanks & Pottenger. # Likewise, use Yu-Cheng Chuang's data for DST in Taiwan. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Taiwan 1946 only - May 15 0:00 1:00 D Rule Taiwan 1946 only - Oct 1 0:00 0 S Rule Taiwan 1947 only - Apr 15 0:00 1:00 D @@ -1122,7 +1122,7 @@ Zone Asia/Taipei 8:06:00 - LMT 1896 Jan 1 # The 1904 decree says that Macau changed from the meridian of # Fortaleza do Monte, presumably the basis for the 7:34:10 for LMT. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Macau 1942 1943 - Apr 30 23:00 1:00 - Rule Macau 1942 only - Nov 17 23:00 0 - Rule Macau 1943 only - Sep 30 23:00 0 S @@ -1180,7 +1180,7 @@ Zone Asia/Macau 7:34:10 - LMT 1904 Oct 30 # Cyprus to remain united in time. Cyprus Mail 2017-10-17. # https://cyprus-mail.com/2017/10/17/cyprus-remain-united-time/ -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cyprus 1975 only - Apr 13 0:00 1:00 S Rule Cyprus 1975 only - Oct 12 0:00 0 - Rule Cyprus 1976 only - May 15 0:00 1:00 S @@ -1557,7 +1557,7 @@ Zone Asia/Jayapura 9:22:48 - LMT 1932 Nov # be changed back to its previous state on the 24 hours of the # thirtieth day of Shahrivar. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iran 1978 1980 - Mar 20 24:00 1:00 - Rule Iran 1978 only - Oct 20 24:00 0 - Rule Iran 1979 only - Sep 18 24:00 0 - @@ -1699,7 +1699,7 @@ Zone Asia/Tehran 3:25:44 - LMT 1916 # We have published a short article in English about the change: # https://www.timeanddate.com/news/time/iraq-dumps-daylight-saving.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iraq 1982 only - May 1 0:00 1:00 - Rule Iraq 1982 1984 - Oct 1 0:00 0 - Rule Iraq 1983 only - Mar 31 0:00 1:00 - @@ -1722,6 +1722,10 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890 # Israel +# For more info about the motivation for DST in Israel, see: +# Barak Y. Israel's Daylight Saving Time controversy. Israel Affairs. +# 2020-08-11. https://doi.org/10.1080/13537121.2020.1806564 + # From Ephraim Silverberg (2001-01-11): # # I coined "IST/IDT" circa 1988. Until then there were three @@ -1743,7 +1747,7 @@ Zone Asia/Baghdad 2:57:40 - LMT 1890 # family is from India). # From Shanks & Pottenger: -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 1940 only - Jun 1 0:00 1:00 D Rule Zion 1942 1944 - Nov 1 0:00 0 S Rule Zion 1943 only - Apr 1 2:00 1:00 D @@ -1835,7 +1839,7 @@ Rule Zion 1988 only - Sep 4 0:00 0 S # (except in 2002) is three nights before Yom Kippur [Day of Atonement] # (the eve of the 7th of Tishrei in the lunar Hebrew calendar). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 1989 only - Apr 30 0:00 1:00 D Rule Zion 1989 only - Sep 3 0:00 0 S Rule Zion 1990 only - Mar 25 0:00 1:00 D @@ -1851,7 +1855,7 @@ Rule Zion 1993 only - Sep 5 0:00 0 S # Ministry of Interior, Jerusalem, Israel. The spokeswoman can be reached by # calling the office directly at 972-2-6701447 or 972-2-6701448. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 1994 only - Apr 1 0:00 1:00 D Rule Zion 1994 only - Aug 28 0:00 0 S Rule Zion 1995 only - Mar 31 0:00 1:00 D @@ -1871,7 +1875,7 @@ Rule Zion 1995 only - Sep 3 0:00 0 S # # where YYYY is the relevant year. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 1996 only - Mar 15 0:00 1:00 D Rule Zion 1996 only - Sep 16 0:00 0 S Rule Zion 1997 only - Mar 21 0:00 1:00 D @@ -1894,7 +1898,7 @@ Rule Zion 1999 only - Sep 3 2:00 0 S # # ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2000-2004.ps.gz -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2000 only - Apr 14 2:00 1:00 D Rule Zion 2000 only - Oct 6 1:00 0 S Rule Zion 2001 only - Apr 9 1:00 1:00 D @@ -1916,7 +1920,7 @@ Rule Zion 2004 only - Sep 22 1:00 0 S # # ftp://ftp.cs.huji.ac.il/pub/tz/announcements/2005+beyond.ps -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2005 2012 - Apr Fri<=1 2:00 1:00 D Rule Zion 2005 only - Oct 9 2:00 0 S Rule Zion 2006 only - Oct 1 2:00 0 S @@ -1936,7 +1940,7 @@ Rule Zion 2012 only - Sep 23 2:00 0 S # As of 2013, DST starts at 02:00 on the Friday before the last Sunday # in March. DST ends at 02:00 on the last Sunday of October. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Zion 2013 max - Mar Fri>=23 2:00 1:00 D Rule Zion 2013 max - Oct lastSun 2:00 0 S @@ -2036,7 +2040,7 @@ Zone Asia/Jerusalem 2:20:54 - LMT 1880 # do in any POSIX or C platform. The "25:00" assumes zic from 2007 or later, # which should be safe now. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Japan 1948 only - May Sat>=1 24:00 1:00 D Rule Japan 1948 1951 - Sep Sat>=8 25:00 0 S Rule Japan 1949 only - Apr Sat>=1 24:00 1:00 D @@ -2113,7 +2117,7 @@ Zone Asia/Tokyo 9:18:59 - LMT 1887 Dec 31 15:00u # From Paul Eggert (2013-12-11): # As Steffen suggested, consider the past 21-month experiment to be DST. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Jordan 1973 only - Jun 6 0:00 1:00 S Rule Jordan 1973 1975 - Oct 1 0:00 0 - Rule Jordan 1974 1977 - May 1 0:00 1:00 S @@ -2439,7 +2443,7 @@ Zone Asia/Oral 3:25:24 - LMT 1924 May 2 # or Ural'sk # Our government cancels daylight saving time 6th of August 2005. # From 2005-08-12 our GMT-offset is +6, w/o any daylight saving. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Kyrgyz 1992 1996 - Apr Sun>=7 0:00s 1:00 - Rule Kyrgyz 1992 1996 - Sep lastSun 0:00 0 - Rule Kyrgyz 1997 2005 - Mar lastSun 2:30 1:00 - @@ -2495,7 +2499,7 @@ Zone Asia/Bishkek 4:58:24 - LMT 1924 May 2 # follow and continued to use GMT+9:00 for interoperability. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule ROK 1948 only - Jun 1 0:00 1:00 D Rule ROK 1948 only - Sep 12 24:00 0 S Rule ROK 1949 only - Apr 3 0:00 1:00 D @@ -2583,7 +2587,7 @@ Zone Asia/Pyongyang 8:23:00 - LMT 1908 Apr 1 # Lebanon -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Lebanon 1920 only - Mar 28 0:00 1:00 S Rule Lebanon 1920 only - Oct 25 0:00 0 - Rule Lebanon 1921 only - Apr 3 0:00 1:00 S @@ -2613,7 +2617,7 @@ Zone Asia/Beirut 2:22:00 - LMT 1880 2:00 Lebanon EE%sT # Malaysia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NBorneo 1935 1941 - Sep 14 0:00 0:20 - Rule NBorneo 1935 1941 - Dec 14 0:00 0 - # @@ -2758,7 +2762,7 @@ Zone Indian/Maldives 4:54:00 - LMT 1880 # Malé # September daylight saving time ends. Source: # http://zasag.mn/news/view/8969 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mongol 1983 1984 - Apr 1 0:00 1:00 - Rule Mongol 1983 only - Oct 1 0:00 0 - # Shanks & Pottenger and IATA SSIM say 1990s switches occurred at 00:00, @@ -2946,7 +2950,7 @@ Zone Asia/Kathmandu 5:41:16 - LMT 1920 # "People laud PM's announcement to end DST" # http://www.app.com.pk/en_/index.php?option=com_content&task=view&id=99374&Itemid=2 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Pakistan 2002 only - Apr Sun>=2 0:00 1:00 S Rule Pakistan 2002 only - Oct Sun>=2 0:00 0 - Rule Pakistan 2008 only - Jun 1 0:00 1:00 S @@ -3240,15 +3244,42 @@ Zone Asia/Karachi 4:28:12 - LMT 1907 # From Sharef Mustafa (2019-10-18): # Palestine summer time will end on midnight Oct 26th 2019 ... -# http://www.palestinecabinet.gov.ps/website/ar/ViewDetails?ID=43948 # -# From Paul Eggert (2019-04-10): -# For now, guess spring-ahead transitions are March's last Friday at 00:00. +# From Steffen Thorsen (2020-10-20): +# Some sources such as these say, and display on clocks, that DST ended at +# midnight last year... +# https://www.amad.ps/ar/post/320006 +# +# From Tim Parenti (2020-10-20): +# The report of the Palestinian Cabinet meeting of 2019-10-14 confirms +# a decision on (translated): "The start of the winter time in Palestine, by +# delaying the clock by sixty minutes, starting from midnight on Friday / +# Saturday corresponding to 26/10/2019." +# http://www.palestinecabinet.gov.ps/portal/meeting/details/43948 + +# From Sharef Mustafa (2020-10-20): +# As per the palestinian cabinet announcement yesterday , the day light saving +# shall [end] on Oct 24th 2020 at 01:00AM by delaying the clock by 60 minutes. +# http://www.palestinecabinet.gov.ps/portal/Meeting/Details/51584 + +# From Tim Parenti (2020-10-20): +# Predict future fall transitions at 01:00 on the Saturday preceding October's +# last Sunday (i.e., Sat>=24). This is consistent with our predictions since +# 2016, although the time of the change differed slightly in 2019. + +# From Pierre Cashon (2020-10-20): +# The summer time this year started on March 28 at 00:00. +# https://wafa.ps/ar_page.aspx?id=GveQNZa872839351758aGveQNZ +# http://www.palestinecabinet.gov.ps/portal/meeting/details/50284 +# The winter time in 2015 started on October 23 at 01:00. +# https://wafa.ps/ar_page.aspx?id=CgpCdYa670694628582aCgpCdY +# http://www.palestinecabinet.gov.ps/portal/meeting/details/27583 # -# From Tim Parenti (2016-10-19): -# Predict fall transitions on October's last Saturday at 01:00 from now on. +# From Paul Eggert (2019-04-10): +# For now, guess spring-ahead transitions are at 00:00 on the Saturday +# preceding March's last Sunday (i.e., Sat>=24). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EgyptAsia 1957 only - May 10 0:00 1:00 S Rule EgyptAsia 1957 1958 - Oct 1 0:00 0 - Rule EgyptAsia 1958 only - May 1 0:00 1:00 S @@ -3262,10 +3293,10 @@ Rule Palestine 2004 only - Oct 1 1:00 0 - Rule Palestine 2005 only - Oct 4 2:00 0 - Rule Palestine 2006 2007 - Apr 1 0:00 1:00 S Rule Palestine 2006 only - Sep 22 0:00 0 - -Rule Palestine 2007 only - Sep Thu>=8 2:00 0 - +Rule Palestine 2007 only - Sep 13 2:00 0 - Rule Palestine 2008 2009 - Mar lastFri 0:00 1:00 S Rule Palestine 2008 only - Sep 1 0:00 0 - -Rule Palestine 2009 only - Sep Fri>=1 1:00 0 - +Rule Palestine 2009 only - Sep 4 1:00 0 - Rule Palestine 2010 only - Mar 26 0:00 1:00 S Rule Palestine 2010 only - Aug 11 0:00 0 - Rule Palestine 2011 only - Apr 1 0:01 1:00 S @@ -3274,12 +3305,16 @@ Rule Palestine 2011 only - Aug 30 0:00 1:00 S Rule Palestine 2011 only - Sep 30 0:00 0 - Rule Palestine 2012 2014 - Mar lastThu 24:00 1:00 S Rule Palestine 2012 only - Sep 21 1:00 0 - -Rule Palestine 2013 only - Sep Fri>=21 0:00 0 - -Rule Palestine 2014 2015 - Oct Fri>=21 0:00 0 - -Rule Palestine 2015 only - Mar lastFri 24:00 1:00 S +Rule Palestine 2013 only - Sep 27 0:00 0 - +Rule Palestine 2014 only - Oct 24 0:00 0 - +Rule Palestine 2015 only - Mar 28 0:00 1:00 S +Rule Palestine 2015 only - Oct 23 1:00 0 - Rule Palestine 2016 2018 - Mar Sat>=24 1:00 1:00 S -Rule Palestine 2016 max - Oct lastSat 1:00 0 - -Rule Palestine 2019 max - Mar lastFri 0:00 1:00 S +Rule Palestine 2016 2018 - Oct Sat>=24 1:00 0 - +Rule Palestine 2019 only - Mar 29 0:00 1:00 S +Rule Palestine 2019 only - Oct Sat>=24 0:00 0 - +Rule Palestine 2020 max - Mar Sat>=24 0:00 1:00 S +Rule Palestine 2020 max - Oct Sat>=24 1:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Asia/Gaza 2:17:52 - LMT 1900 Oct @@ -3348,7 +3383,7 @@ Zone Asia/Hebron 2:20:23 - LMT 1900 Oct # influence of the sources. There is no current abbreviation for DST, # so use "PDT", the usual American style. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Phil 1936 only - Nov 1 0:00 1:00 D Rule Phil 1937 only - Feb 1 0:00 0 S Rule Phil 1954 only - Apr 12 0:00 1:00 D @@ -3496,7 +3531,7 @@ Zone Asia/Colombo 5:19:24 - LMT 1880 5:30 - +0530 # Syria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Syria 1920 1923 - Apr Sun>=15 2:00 1:00 S Rule Syria 1920 1923 - Oct Sun>=1 2:00 0 - Rule Syria 1962 only - Apr 29 2:00 1:00 S diff --git a/make/data/tzdata/australasia b/make/data/tzdata/australasia index e66d5ca4d79..1f0fd47959f 100644 --- a/make/data/tzdata/australasia +++ b/make/data/tzdata/australasia @@ -36,7 +36,7 @@ # Please see the notes below for the controversy about "EST" versus "AEST" etc. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Aus 1917 only - Jan 1 0:01 1:00 D Rule Aus 1917 only - Mar 25 2:00 0 S Rule Aus 1942 only - Jan 1 2:00 1:00 D @@ -55,7 +55,7 @@ Zone Australia/Darwin 8:43:20 - LMT 1895 Feb 9:30 Aus AC%sT # Western Australia # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AW 1974 only - Oct lastSun 2:00s 1:00 D Rule AW 1975 only - Mar Sun>=1 2:00s 0 S Rule AW 1983 only - Oct lastSun 2:00s 1:00 D @@ -93,7 +93,7 @@ Zone Australia/Eucla 8:35:28 - LMT 1895 Dec # applies to all of the Whitsundays. # http://www.australia.gov.au/about-australia/australian-story/austn-islands # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AQ 1971 only - Oct lastSun 2:00s 1:00 D Rule AQ 1972 only - Feb lastSun 2:00s 0 S Rule AQ 1989 1991 - Oct lastSun 2:00s 1:00 D @@ -109,7 +109,7 @@ Zone Australia/Lindeman 9:55:56 - LMT 1895 10:00 Holiday AE%sT # South Australia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AS 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AS 1986 only - Oct 19 2:00s 1:00 D Rule AS 1987 2007 - Oct lastSun 2:00s 1:00 D @@ -137,7 +137,7 @@ Zone Australia/Adelaide 9:14:20 - LMT 1895 Feb # http://www.bom.gov.au/climate/averages/tables/dst_times.shtml # says King Island didn't observe DST from WWII until late 1971. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AT 1967 only - Oct Sun>=1 2:00s 1:00 D Rule AT 1968 only - Mar lastSun 2:00s 0 S Rule AT 1968 1985 - Oct lastSun 2:00s 1:00 D @@ -170,7 +170,7 @@ Zone Australia/Currie 9:35:28 - LMT 1895 Sep 10:00 AT AE%sT # Victoria -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AV 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AV 1972 only - Feb lastSun 2:00s 0 S Rule AV 1973 1985 - Mar Sun>=1 2:00s 0 S @@ -191,7 +191,7 @@ Zone Australia/Melbourne 9:39:52 - LMT 1895 Feb 10:00 AV AE%sT # New South Wales -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule AN 1971 1985 - Oct lastSun 2:00s 1:00 D Rule AN 1972 only - Feb 27 2:00s 0 S Rule AN 1973 1981 - Mar Sun>=1 2:00s 0 S @@ -220,7 +220,7 @@ Zone Australia/Broken_Hill 9:25:48 - LMT 1895 Feb 9:30 AS AC%sT # Lord Howe Island -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule LH 1981 1984 - Oct lastSun 2:00 1:00 - Rule LH 1982 1985 - Mar Sun>=1 2:00 0 - Rule LH 1985 only - Oct lastSun 2:00 0:30 - @@ -275,8 +275,9 @@ Zone Antarctica/Macquarie 0 - -00 1899 Nov 10:00 Aus AE%sT 1919 Apr 1 0:00s 0 - -00 1948 Mar 25 10:00 Aus AE%sT 1967 - 10:00 AT AE%sT 2010 Apr 4 3:00 - 11:00 - +11 + 10:00 AT AE%sT 2010 + 10:00 1:00 AEDT 2011 + 10:00 AT AE%sT # Christmas # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -403,7 +404,20 @@ Zone Indian/Cocos 6:27:40 - LMT 1900 # From Michael Deckers (2019-08-06): # https://www.laws.gov.fj/LawsAsMade/downloadfile/848 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# From Raymond Kumar (2020-10-08): +# [DST in Fiji] is from December 20th 2020, till 17th January 2021. +# From Alan Mintz (2020-10-08): +# https://www.laws.gov.fj/LawsAsMade/GetFile/1071 +# From Tim Parenti (2020-10-08): +# https://www.fijivillage.com/news/Daylight-saving-from-Dec-20th-this-year-to-Jan-17th-2021-8rf4x5/ +# "Minister for Employment, Parveen Bala says they had never thought of +# stopping daylight saving. He says it was just to decide on when it should +# start and end. Bala says it is a short period..." +# Since the end date is still in line with our ongoing predictions, assume for +# now that the later-than-usual start date is a one-time departure from the +# recent second Sunday in November pattern. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Fiji 1998 1999 - Nov Sun>=1 2:00 1:00 - Rule Fiji 1999 2000 - Feb lastSun 3:00 0 - Rule Fiji 2009 only - Nov 29 2:00 1:00 - @@ -414,7 +428,9 @@ Rule Fiji 2012 2013 - Jan Sun>=18 3:00 0 - Rule Fiji 2014 only - Jan Sun>=18 2:00 0 - Rule Fiji 2014 2018 - Nov Sun>=1 2:00 1:00 - Rule Fiji 2015 max - Jan Sun>=12 3:00 0 - -Rule Fiji 2019 max - Nov Sun>=8 2:00 1:00 - +Rule Fiji 2019 only - Nov Sun>=8 2:00 1:00 - +Rule Fiji 2020 only - Dec 20 2:00 1:00 - +Rule Fiji 2021 max - Nov Sun>=8 2:00 1:00 - # Zone NAME STDOFF RULES FORMAT [UNTIL] Zone Pacific/Fiji 11:55:44 - LMT 1915 Oct 26 # Suva 12:00 Fiji +12/+13 @@ -432,7 +448,7 @@ Zone Pacific/Tahiti -9:58:16 - LMT 1912 Oct # Papeete # Guam -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # http://guamlegislature.com/Public_Laws_5th/PL05-025.pdf # http://documents.guam.gov/wp-content/uploads/E.O.-59-7-Guam-Daylight-Savings-Time-May-6-1959.pdf Rule Guam 1959 only - Jun 27 2:00 1:00 D @@ -543,7 +559,7 @@ Zone Pacific/Nauru 11:07:40 - LMT 1921 Jan 15 # Uaobe 12:00 - +12 # New Caledonia -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NC 1977 1978 - Dec Sun>=1 0:00 1:00 - Rule NC 1978 1979 - Feb 27 0:00 0 - Rule NC 1996 only - Dec 1 2:00s 1:00 - @@ -558,7 +574,7 @@ Zone Pacific/Noumea 11:05:48 - LMT 1912 Jan 13 # Nouméa # New Zealand -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NZ 1927 only - Nov 6 2:00 1:00 S Rule NZ 1928 only - Mar 4 2:00 0 M Rule NZ 1928 1933 - Oct Sun>=8 2:00 0:30 S @@ -610,7 +626,7 @@ Link Pacific/Auckland Antarctica/McMurdo # Cook Is # From Shanks & Pottenger: -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cook 1978 only - Nov 12 0:00 0:30 - Rule Cook 1979 1991 - Mar Sun>=1 0:00 0 - Rule Cook 1979 1990 - Oct lastSun 0:00 0:30 - @@ -755,7 +771,7 @@ Link Pacific/Pago_Pago Pacific/Midway # in US minor outlying islands # That web page currently lists transitions for 2012/3 and 2013/4. # Assume the pattern instituted in 2012 will continue indefinitely. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule WS 2010 only - Sep lastSun 0:00 1 - Rule WS 2011 only - Apr Sat>=1 4:00 0 - Rule WS 2011 only - Sep lastSat 3:00 1 - @@ -799,7 +815,7 @@ Zone Pacific/Fakaofo -11:24:56 - LMT 1901 13:00 - +13 # Tonga -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Tonga 1999 only - Oct 7 2:00s 1:00 - Rule Tonga 2000 only - Mar 19 2:00s 0 - Rule Tonga 2000 2001 - Nov Sun>=1 2:00 1:00 - @@ -880,7 +896,7 @@ Zone Pacific/Wake 11:06:28 - LMT 1901 # Vanuatu -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Vanuatu 1983 only - Sep 25 0:00 1:00 - Rule Vanuatu 1984 1991 - Mar Sun>=23 0:00 0 - Rule Vanuatu 1984 only - Oct 23 0:00 1:00 - diff --git a/make/data/tzdata/europe b/make/data/tzdata/europe index 8fed2cf5e98..adb260624dc 100644 --- a/make/data/tzdata/europe +++ b/make/data/tzdata/europe @@ -411,7 +411,7 @@ # http://www.irishstatutebook.ie/eli/1926/sro/919/made/en/print # http://www.irishstatutebook.ie/eli/1947/sro/71/made/en/print -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Summer Time Act, 1916 Rule GB-Eire 1916 only - May 21 2:00s 1:00 BST Rule GB-Eire 1916 only - Oct 1 2:00s 0 GMT @@ -552,7 +552,7 @@ Link Europe/London Europe/Isle_of_Man # The following is like GB-Eire and EU, except with standard time in # summer and negative daylight saving time in winter. It is for when # negative SAVE values are used. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Eire 1971 only - Oct 31 2:00u -1:00 - Rule Eire 1972 1980 - Mar Sun>=16 2:00u 0 - Rule Eire 1972 1980 - Oct Sun>=23 2:00u -1:00 - @@ -589,7 +589,7 @@ Zone Europe/Dublin -0:25:00 - LMT 1880 Aug 2 # predecessor organization, the European Communities. # For brevity they are called "EU rules" elsewhere in this file. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule EU 1977 1980 - Apr Sun>=1 1:00u 1:00 S Rule EU 1977 only - Sep lastSun 1:00u 0 - Rule EU 1978 only - Oct 1 1:00u 0 - @@ -629,13 +629,13 @@ Rule C-Eur 1944 only - Oct 2 2:00s 0 - # corrected in version 2008d). The circumstantial evidence is simply the # tz database itself, as seen below: # -# Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 +# Zone Europe/Paris ... # 0:00 France WE%sT 1945 Sep 16 3:00 # -# Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 +# Zone Europe/Monaco ... # 0:00 France WE%sT 1945 Sep 16 3:00 # -# Zone Europe/Belgrade 1:22:00 - LMT 1884 +# Zone Europe/Belgrade ... # 1:00 1:00 CEST 1945 Sep 16 2:00s # # Rule France 1945 only - Sep 16 3:00 0 - @@ -681,7 +681,7 @@ Rule E-Eur 1996 max - Oct lastSun 0:00 0 - # # The 1917-1921 decree URLs are from Alexander Belopolsky (2016-08-23). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Russia 1917 only - Jul 1 23:00 1:00 MST # Moscow Summer Time # # Decree No. 142 (1917-12-22) http://istmat.info/node/28137 @@ -795,7 +795,7 @@ Zone EET 2:00 EU EE%sT # Albania -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Albania 1940 only - Jun 16 0:00 1:00 S Rule Albania 1942 only - Nov 2 3:00 0 - Rule Albania 1943 only - Mar 29 2:00 1:00 S @@ -849,7 +849,7 @@ Zone Europe/Andorra 0:06:04 - LMT 1901 # In 1946 the end of DST was on Monday, 7 October 1946, at 3:00 am. # Shanks had this right. Source: Die Weltpresse, 5. Oktober 1946, page 5. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Austria 1920 only - Apr 5 2:00s 1:00 S Rule Austria 1920 only - Sep 13 2:00s 0 - Rule Austria 1946 only - Apr 14 2:00s 1:00 S @@ -936,7 +936,7 @@ Zone Europe/Minsk 1:50:16 - LMT 1880 # The 1918 rules are listed for completeness; they apply to unoccupied Belgium. # Assume Brussels switched to WET in 1918 when the armistice took effect. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Belgium 1918 only - Mar 9 0:00s 1:00 S Rule Belgium 1918 1919 - Oct Sat>=1 23:00s 0 - Rule Belgium 1919 only - Mar 1 23:00s 1:00 S @@ -996,7 +996,7 @@ Zone Europe/Brussels 0:17:30 - LMT 1880 # EET -> EETDST is in 03:00 Local time in last Sunday of March ... # EETDST -> EET is in 04:00 Local time in last Sunday of October # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Bulg 1979 only - Mar 31 23:00 1:00 S Rule Bulg 1979 only - Oct 1 1:00 0 - Rule Bulg 1980 1982 - Apr Sat>=1 23:00 1:00 S @@ -1028,7 +1028,7 @@ Zone Europe/Sofia 1:33:16 - LMT 1880 # We know of no English-language name for historical Czech winter time; # abbreviate it as "GMT", as it happened to be GMT. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Czech 1945 only - Apr Mon>=1 2:00s 1:00 S Rule Czech 1945 only - Oct 1 2:00s 0 - Rule Czech 1946 only - May 6 2:00s 1:00 S @@ -1052,17 +1052,16 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # Denmark, Faroe Islands, and Greenland # From Jesper Nørgaard Welen (2005-04-26): -# http://www.hum.aau.dk/~poe/tid/tine/DanskTid.htm says that the law -# [introducing standard time] was in effect from 1894-01-01.... -# The page http://www.retsinfo.dk/_GETDOCI_/ACCN/A18930008330-REGL +# the law [introducing standard time] was in effect from 1894-01-01.... +# The page https://www.retsinformation.dk/eli/lta/1893/83 # confirms this, and states that the law was put forth 1893-03-29. # # The EU [actually, EEC and Euratom] treaty with effect from 1973: -# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19722110030-REGL +# https://www.retsinformation.dk/eli/lta/1972/21100 # # This provoked a new law from 1974 to make possible summer time changes # in subsequent decrees with the law -# http://www.retsinfo.dk/_GETDOCI_/ACCN/A19740022330-REGL +# https://www.retsinformation.dk/eli/lta/1974/223 # # It seems however that no decree was set forward until 1980. I have # not found any decree, but in another related law, the effecting DST @@ -1074,7 +1073,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # The law is about the management of the extra hour, concerning # working hours reported and effect on obligatory-rest rules (which # was suspended on that night): -# http://www.retsinfo.dk/_GETDOCI_/ACCN/C19801120554-REGL +# https://web.archive.org/web/20140104053304/https://www.retsinformation.dk/Forms/R0710.aspx?id=60267 # From Jesper Nørgaard Welen (2005-06-11): # The Herning Folkeblad (1980-09-26) reported that the night between @@ -1084,7 +1083,7 @@ Zone Europe/Prague 0:57:44 - LMT 1850 # Hence the "02:00" of the 1980 law refers to standard time, not # wall-clock time, and so the EU rules were in effect in 1980. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Denmark 1916 only - May 14 23:00 1:00 S Rule Denmark 1916 only - Sep 30 23:00 0 - Rule Denmark 1940 only - May 15 0:00 1:00 S @@ -1186,7 +1185,7 @@ Zone Atlantic/Faroe -0:27:04 - LMT 1908 Jan 11 # Tórshavn # http://naalakkersuisut.gl/~/media/Nanoq/Files/Attached%20Files/Engelske-tekster/Legislation/Executive%20Order%20National%20Park.rtf # It is their only National Park. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Thule 1991 1992 - Mar lastSun 2:00 1:00 D Rule Thule 1991 1992 - Sep lastSun 2:00 0 S Rule Thule 1993 2006 - Apr Sun>=1 2:00 1:00 D @@ -1317,7 +1316,7 @@ Zone Europe/Tallinn 1:39:00 - LMT 1880 # From Paul Eggert (2014-06-14): # Go with Oja over Shanks. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Finland 1942 only - Apr 2 24:00 1:00 S Rule Finland 1942 only - Oct 4 1:00 0 - Rule Finland 1981 1982 - Mar lastSun 2:00 1:00 S @@ -1349,10 +1348,58 @@ Link Europe/Helsinki Europe/Mariehamn # Françoise Gauquelin, Problèmes de l'heure résolus en astrologie, # Guy Trédaniel, Paris 1987 +# From Michael Deckers (2020-06-11): +# the law of 1891 +# was published on 1891-03-15, so it could only take force on 1891-03-16. + +# From Michael Deckers (2020-06-10): +# Le Gaulois, 1911-03-11, page 1/6, online at +# https://www.retronews.fr/societe/echo-de-presse/2018/01/29/1911-change-lheure-de-paris +# ... [ Instantly, all pressure driven clock dials halted... Nine minutes and +# twenty-one seconds later the hands resumed their circular motion. ] +# There are also precise reports about how the change was prepared in train +# stations: all the publicly visible clocks stopped at midnight railway time +# (or were covered), only the chief of service had a watch, labeled +# "Heure ancienne", that he kept running until it reached 00:04:21, when +# he announced "Heure nouvelle". See the "Le Petit Journal 1911-03-11". +# https://gallica.bnf.fr/ark:/12148/bpt6k6192911/f1.item.zoom +# +# From Michael Deckers (2020-06-12): +# That "all French clocks stopped" for 00:09:21 is a misreading of French +# newspapers; this sort of adjustment applies only to certain +# remote-controlled clocks ("pendules pneumatiques", of which there existed +# perhaps a dozen in Paris, and which simply could not be set back remotely), +# but not to all the clocks in all French towns and villages. For instance, +# the following story in the "Courrier de Saône-et-Loire" 1911-03-11, page 2: +# only works if legal time was stepped back (was not monotone): ... +# [One can observe that children who had been born at midnight less 5 +# minutes and who had died at midnight of the old time, would turn out to +# be dead before being born, time having been set back and having +# suppressed 9 minutes and 25 seconds of their existence, that is, more +# than they could spend.] +# +# From Paul Eggert (2020-06-12): +# French time in railway stations was legally five minutes behind civil time, +# which explains why railway "old time" ran to 00:04:21 instead of to 00:09:21. +# The law's text (which Michael Deckers noted is at +# ) says only that +# at 1911-03-11 00:00 legal time was that of Paris mean time delayed by +# nine minutes and twenty-one seconds, and does not say how the +# transition from Paris mean time was to occur. +# +# tzdb has no way to represent stopped clocks. As the railway practice +# was to keep a watch running on "old time" to decide when to restart +# the other clocks, this could be modeled as a transition for "old time" at +# 00:09:21. However, since the law was ambiguous and clocks outside railway +# stations were probably done haphazardly with the popular impression being +# that the transition was done at 00:00 "old time", simply leave the time +# blank; this causes zic to default to 00:00 "old time" which is good enough. +# Do something similar for the 1891-03-16 transition. There are similar +# problems in Algiers, Monaco and Tunis. # # Shank & Pottenger seem to use '24:00' ambiguously; resolve it with Whitman. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule France 1916 only - Jun 14 23:00s 1:00 S Rule France 1916 1919 - Oct Sun>=1 23:00s 0 - Rule France 1917 only - Mar 24 23:00s 1:00 S @@ -1412,13 +1459,11 @@ Rule France 1945 only - Sep 16 3:00 0 - # go with Excoffier's 28/3/76 0hUT and 25/9/76 23hUT. Rule France 1976 only - Mar 28 1:00 1:00 S Rule France 1976 only - Sep 26 1:00 0 - -# Shanks & Pottenger give 0:09:20 for Paris Mean Time, and Whitman 0:09:05, -# but Howse quotes the actual French legislation as saying 0:09:21. -# Go with Howse. Howse writes that the time in France was officially based +# Howse writes that the time in France was officially based # on PMT-0:09:21 until 1978-08-09, when the time base finally switched to UTC. # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 - 0:09:21 - PMT 1911 Mar 11 0:01 # Paris MT +Zone Europe/Paris 0:09:21 - LMT 1891 Mar 16 + 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time # Shanks & Pottenger give 1940 Jun 14 0:00; go with Excoffier and Le Corre. 0:00 France WE%sT 1940 Jun 14 23:00 # Le Corre says Paris stuck with occupied-France time after the liberation; @@ -1447,7 +1492,7 @@ Zone Europe/Paris 0:09:21 - LMT 1891 Mar 15 0:01 # this was equivalent to UT +03, not +04. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Germany 1946 only - Apr 14 2:00s 1:00 S Rule Germany 1946 only - Oct 7 2:00s 0 - Rule Germany 1947 1949 - Oct Sun>=1 2:00s 0 - @@ -1499,7 +1544,7 @@ Zone Europe/Gibraltar -0:21:24 - LMT 1880 Aug 2 0:00s 1:00 EU CE%sT # Greece -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Whitman gives 1932 Jul 5 - Nov 1; go with Shanks & Pottenger. Rule Greece 1932 only - Jul 7 0:00 1:00 S Rule Greece 1932 only - Sep 1 0:00 0 - @@ -1534,38 +1579,73 @@ Zone Europe/Athens 1:34:52 - LMT 1895 Sep 14 2:00 EU EE%sT # Hungary -# From Paul Eggert (2014-07-15): -# Dates for 1916-1945 are taken from: -# Oross A. Jelen a múlt jövője: a nyári időszámítás Magyarországon 1916-1945. -# National Archives of Hungary (2012-10-29). -# http://mnl.gov.hu/a_het_dokumentuma/a_nyari_idoszamitas_magyarorszagon_19161945.html -# This source does not always give times, which are taken from Shanks -# & Pottenger (which disagree about the dates). -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule Hungary 1918 only - Apr 1 3:00 1:00 S -Rule Hungary 1918 only - Sep 16 3:00 0 - -Rule Hungary 1919 only - Apr 15 3:00 1:00 S -Rule Hungary 1919 only - Nov 24 3:00 0 - + +# From Michael Deckers (2020-06-09): +# an Austrian encyclopedia of railroads of 1913, online at +# http://www.zeno.org/Roell-1912/A/Eisenbahnzeit +# says that the switch [to CET] happened on 1890-11-01. + +# From Géza Nyáry (2020-06-07): +# Data for 1918-1983 are based on the archive database of Library Hungaricana. +# The dates are collected from original, scanned governmental orders, +# bulletins, instructions and public press. +# [See URLs below.] + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S +# https://library.hungaricana.hu/hu/view/OGYK_RT_1918/?pg=238 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1919/?pg=808 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1920/?pg=201 +Rule Hungary 1918 1919 - Apr 15 2:00 1:00 S +Rule Hungary 1918 1920 - Sep Mon>=15 3:00 0 - +Rule Hungary 1920 only - Apr 5 2:00 1:00 S +# https://library.hungaricana.hu/hu/view/OGYK_RT_1945/?pg=882 Rule Hungary 1945 only - May 1 23:00 1:00 S -Rule Hungary 1945 only - Nov 1 0:00 0 - +Rule Hungary 1945 only - Nov 1 1:00 0 - +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_03/?pg=49 Rule Hungary 1946 only - Mar 31 2:00s 1:00 S -Rule Hungary 1946 1949 - Oct Sun>=1 2:00s 0 - +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1946_09/?pg=54 +Rule Hungary 1946 only - Oct 7 2:00 0 - +# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1947_04_1__001-123/?pg=90 +# https://library.hungaricana.hu/hu/view/DunantuliNaplo_1947_09/?pg=128 +# https://library.hungaricana.hu/hu/view/KulfBelfHirek_1948_03_3__001-123/?pg=304 +# https://library.hungaricana.hu/hu/view/Zala_1948_09/?pg=64 +# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=53 +# https://library.hungaricana.hu/hu/view/SatoraljaujhelyiLeveltar_ZempleniNepujsag_1948/?pg=160 +# https://library.hungaricana.hu/hu/view/UjSzo_1949_01-04/?pg=102 +# https://library.hungaricana.hu/hu/view/KeletMagyarorszag_1949_03/?pg=96 +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1949_09/?pg=94 Rule Hungary 1947 1949 - Apr Sun>=4 2:00s 1:00 S -Rule Hungary 1950 only - Apr 17 2:00s 1:00 S -Rule Hungary 1950 only - Oct 23 2:00s 0 - -Rule Hungary 1954 1955 - May 23 0:00 1:00 S -Rule Hungary 1954 1955 - Oct 3 0:00 0 - -Rule Hungary 1956 only - Jun Sun>=1 0:00 1:00 S -Rule Hungary 1956 only - Sep lastSun 0:00 0 - -Rule Hungary 1957 only - Jun Sun>=1 1:00 1:00 S -Rule Hungary 1957 only - Sep lastSun 3:00 0 - -Rule Hungary 1980 only - Apr 6 1:00 1:00 S +Rule Hungary 1947 1949 - Oct Sun>=1 2:00s 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1954/?pg=513 +Rule Hungary 1954 only - May 23 0:00 1:00 S +Rule Hungary 1954 only - Oct 3 0:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1955/?pg=398 +Rule Hungary 1955 only - May 22 2:00 1:00 S +Rule Hungary 1955 only - Oct 2 3:00 0 - +# https://library.hungaricana.hu/hu/view/HevesMegyeiNepujsag_1956_06/?pg=0 +# https://library.hungaricana.hu/hu/view/EszakMagyarorszag_1956_06/?pg=6 +# https://library.hungaricana.hu/hu/view/SzolnokMegyeiNeplap_1957_04/?pg=120 +# https://library.hungaricana.hu/hu/view/PestMegyeiHirlap_1957_09/?pg=143 +Rule Hungary 1956 1957 - Jun Sun>=1 2:00 1:00 S +Rule Hungary 1956 1957 - Sep lastSun 3:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=189 +Rule Hungary 1980 only - Apr 6 0:00 1:00 S +Rule Hungary 1980 only - Sep 28 1:00 0 - +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1980/?pg=1227 +# https://library.hungaricana.hu/hu/view/Delmagyarorszag_1981_01/?pg=79 +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1982/?pg=115 +# https://library.hungaricana.hu/hu/view/DTT_KOZL_TanacsokKozlonye_1983/?pg=85 +Rule Hungary 1981 1983 - Mar lastSun 0:00 1:00 S +Rule Hungary 1981 1983 - Sep lastSun 1:00 0 - +# # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Budapest 1:16:20 - LMT 1890 Oct +Zone Europe/Budapest 1:16:20 - LMT 1890 Nov 1 1:00 C-Eur CE%sT 1918 - 1:00 Hungary CE%sT 1941 Apr 8 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1941/?pg=1204 +# https://library.hungaricana.hu/hu/view/OGYK_RT_1942/?pg=3955 + 1:00 Hungary CE%sT 1941 Apr 7 23:00 1:00 C-Eur CE%sT 1945 - 1:00 Hungary CE%sT 1980 Sep 28 2:00s + 1:00 Hungary CE%sT 1984 1:00 EU CE%sT # Iceland @@ -1601,7 +1681,7 @@ Zone Europe/Budapest 1:16:20 - LMT 1890 Oct # The information below is taken from the 1988 Almanak; see # http://www.almanak.hi.is/klukkan.html # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Iceland 1917 1919 - Feb 19 23:00 1:00 - Rule Iceland 1917 only - Oct 21 1:00 0 - Rule Iceland 1918 1919 - Nov 16 1:00 0 - @@ -1693,7 +1773,7 @@ Zone Atlantic/Reykjavik -1:28 - LMT 1908 # to 1944-06-04; although Rome was an open city during this period, it # was effectively controlled by Germany. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Italy 1916 only - Jun 3 24:00 1:00 S Rule Italy 1916 1917 - Sep 30 24:00 0 - Rule Italy 1917 only - Mar 31 24:00 1:00 S @@ -1803,7 +1883,7 @@ Link Europe/Rome Europe/San_Marino # urged Lithuania and Estonia to adopt a similar time policy, but it # appears that they will not do so.... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Latvia 1989 1996 - Mar lastSun 2:00s 1:00 S Rule Latvia 1989 1996 - Sep lastSun 2:00s 0 - @@ -1896,7 +1976,7 @@ Zone Europe/Vilnius 1:41:16 - LMT 1880 # Luxembourg # Whitman disagrees with most of these dates in minor ways; # go with Shanks & Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Lux 1916 only - May 14 23:00 1:00 S Rule Lux 1916 only - Oct 1 1:00 0 - Rule Lux 1917 only - Apr 28 23:00 1:00 S @@ -1937,7 +2017,7 @@ Zone Europe/Luxembourg 0:24:36 - LMT 1904 Jun # From Paul Eggert (2016-10-21): # Assume 1900-1972 was like Rome, overriding Shanks. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Malta 1973 only - Mar 31 0:00s 1:00 S Rule Malta 1973 only - Sep 29 0:00s 0 - Rule Malta 1974 only - Apr 21 0:00s 1:00 S @@ -2010,7 +2090,7 @@ Zone Europe/Malta 0:58:04 - LMT 1893 Nov 2 0:00s # Valletta # says the 2014-03-30 spring-forward transition was at 02:00 local time. # Guess that since 1997 Moldova has switched one hour before the EU. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Moldova 1997 max - Mar lastSun 2:00 1:00 S Rule Moldova 1997 max - Oct lastSun 3:00 0 - @@ -2028,11 +2108,24 @@ Zone Europe/Chisinau 1:55:20 - LMT 1880 2:00 Moldova EE%sT # Monaco -# Shanks & Pottenger give 0:09:20 for Paris Mean Time; go with Howse's -# more precise 0:09:21. +# +# From Michael Deckers (2020-06-12): +# In the "Journal de Monaco" of 1892-05-24, online at +# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/b1c67c12c5af11b41ea888fb048e4fe8.pdf +# we read: ... +# [In virtue of a Sovereign Ordinance of the May 13 of the current [year], +# legal time in the Principality will be set to, from the date of June 1, +# 1892 onwards, to the meridian of Paris, as in France.] +# In the "Journal de Monaco" of 1911-03-28, online at +# https://journaldemonaco.gouv.mc/var/jdm/storage/original/application/de74ffb7db53d4f599059fe8f0ed482a.pdf +# we read an ordinance of 1911-03-16: ... +# [Legal time in the Principality will be set, from the date of promulgation +# of the present ordinance, to legal time in France.... Consequently, legal +# time will be retarded by 9 minutes and 21 seconds.] +# # Zone NAME STDOFF RULES FORMAT [UNTIL] -Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 - 0:09:21 - PMT 1911 Mar 11 # Paris Mean Time +Zone Europe/Monaco 0:29:32 - LMT 1892 Jun 1 + 0:09:21 - PMT 1911 Mar 29 # Paris Mean Time 0:00 France WE%sT 1945 Sep 16 3:00 1:00 France CE%sT 1977 1:00 EU CE%sT @@ -2080,7 +2173,7 @@ Zone Europe/Monaco 0:29:32 - LMT 1891 Mar 15 # The data entries before 1945 are taken from # https://www.staff.science.uu.nl/~gent0113/wettijd/wettijd.htm -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Neth 1916 only - May 1 0:00 1:00 NST # Netherlands Summer Time Rule Neth 1916 only - Oct 1 0:00 0 AMT # Amsterdam Mean Time Rule Neth 1917 only - Apr 16 2:00s 1:00 NST @@ -2117,7 +2210,7 @@ Zone Europe/Amsterdam 0:19:32 - LMT 1835 # Norway # http://met.no/met/met_lex/q_u/sommertid.html (2004-01) agrees with Shanks & # Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Norway 1916 only - May 22 1:00 1:00 S Rule Norway 1916 only - Sep 30 0:00 0 - Rule Norway 1945 only - Apr 2 2:00s 1:00 S @@ -2186,7 +2279,7 @@ Link Europe/Oslo Arctic/Longyearbyen # The 1919 dates and times can be found in Tygodnik Urzędowy nr 1 (1919-03-20), # pp 1-2. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Poland 1918 1919 - Sep 16 2:00s 0 - Rule Poland 1919 only - Apr 15 2:00s 1:00 S Rule Poland 1944 only - Apr 3 2:00s 1:00 S @@ -2257,7 +2350,7 @@ Zone Europe/Warsaw 1:24:00 - LMT 1880 # Guess that the Azores changed to EU rules in 1992 (since that's when Portugal # harmonized with EU rules), and that they stayed +0:00 that winter. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # DSH writes that despite Decree 1,469 (1915), the change to the clocks was not # done every year, depending on what Spain did, because of railroad schedules. # Go with Shanks & Pottenger. @@ -2370,7 +2463,7 @@ Zone Atlantic/Madeira -1:07:36 - LMT 1884 # Funchal # assume that Romania and Moldova switched to EU rules in 1997, # the same year as Bulgaria. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Romania 1932 only - May 21 0:00s 1:00 S Rule Romania 1932 1939 - Oct Sun>=1 0:00s 0 - Rule Romania 1933 1939 - Apr Sun>=2 0:00s 1:00 S @@ -3468,14 +3561,14 @@ Link Europe/Prague Europe/Bratislava # fallback transition from the next day's 00:59... to 00:00. # From Michael Deckers (2016-12-15): -# The Royal Decree of 1900-06-26 quoted by Planesas, online at +# The Royal Decree of 1900-07-26 quoted by Planesas, online at # https://www.boe.es/datos/pdfs/BOE//1900/209/A00383-00384.pdf # says in its article 5 (my translation): # These dispositions will enter into force beginning with the # instant at which, according to the time indicated in article 1, # the 1st day of January of 1901 will begin. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Spain 1918 only - Apr 15 23:00 1:00 S Rule Spain 1918 1919 - Oct 6 24:00s 0 - Rule Spain 1919 only - Apr 6 23:00 1:00 S @@ -3612,7 +3705,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1 # By the end of the 18th century clocks and watches became commonplace # and their performance improved enormously. Communities began to keep # mean time in preference to apparent time - Geneva from 1780 .... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # From Whitman (who writes "Midnight?"): # Rule Swiss 1940 only - Nov 2 0:00 1:00 S # Rule Swiss 1940 only - Dec 31 0:00 0 - @@ -3699,7 +3792,7 @@ Zone Europe/Stockholm 1:12:12 - LMT 1879 Jan 1 # 1853-07-16, though it probably occurred at some other date in Zurich, and # legal civil time probably changed at still some other transition date. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Swiss 1941 1942 - May Mon>=1 1:00 1:00 S Rule Swiss 1941 1942 - Oct Mon>=1 2:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -3848,7 +3941,7 @@ Zone Europe/Zurich 0:34:08 - LMT 1853 Jul 16 # See above comment. # Although Google Translate misfires on that source, it looks like # Turkey reversed last month's decision, and so will stay at +03. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Turkey 1916 only - May 1 0:00 1:00 S Rule Turkey 1916 only - Oct 1 0:00 0 - Rule Turkey 1920 only - Mar 28 0:00 1:00 S @@ -4006,7 +4099,7 @@ Zone Europe/Kiev 2:02:04 - LMT 1880 2:00 1:00 EEST 1991 Sep 29 3:00 2:00 E-Eur EE%sT 1995 2:00 EU EE%sT -# Ruthenia used CET 1990/1991. +# Transcarpathia used CET 1990/1991. # "Uzhhorod" is the transliteration of the Rusyn/Ukrainian pronunciation, but # "Uzhgorod" is more common in English. Zone Europe/Uzhgorod 1:29:12 - LMT 1890 Oct diff --git a/make/data/tzdata/leapseconds b/make/data/tzdata/leapseconds index fe8e170ed26..e00b297baed 100644 --- a/make/data/tzdata/leapseconds +++ b/make/data/tzdata/leapseconds @@ -91,11 +91,11 @@ Leap 2016 Dec 31 23:59:60 + S # Any additional leap seconds will come after this. # This Expires line is commented out for now, # so that pre-2020a zic implementations do not reject this file. -#Expires 2020 Dec 28 00:00:00 +#Expires 2021 Jun 28 00:00:00 # POSIX timestamps for the data in this file: #updated 1467936000 (2016-07-08 00:00:00 UTC) -#expires 1609113600 (2020-12-28 00:00:00 UTC) +#expires 1624838400 (2021-06-28 00:00:00 UTC) -# Updated through IERS Bulletin C59 -# File expires on: 28 December 2020 +# Updated through IERS Bulletin C60 +# File expires on: 28 June 2021 diff --git a/make/data/tzdata/northamerica b/make/data/tzdata/northamerica index 60c7addef09..9a70e313c78 100644 --- a/make/data/tzdata/northamerica +++ b/make/data/tzdata/northamerica @@ -193,7 +193,7 @@ # U.S. government action. So even though the "US" rules have changed # in the latest release, other countries won't be affected. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule US 1918 1919 - Mar lastSun 2:00 1:00 D Rule US 1918 1919 - Oct lastSun 2:00 0 S Rule US 1942 only - Feb 9 2:00 1:00 W # War @@ -370,7 +370,7 @@ Zone PST8PDT -8:00 US P%sT # Eastern time (i.e., -4:56:01.6) just before the 1883 switch. Round to the # nearest second. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule NYC 1920 only - Mar lastSun 2:00 1:00 D Rule NYC 1920 only - Oct lastSun 2:00 0 S Rule NYC 1921 1966 - Apr lastSun 2:00 1:00 D @@ -454,7 +454,7 @@ Zone America/New_York -4:56:02 - LMT 1883 Nov 18 12:03:58 # The Tennessean 2007-05-11, republished 2015-04-06. # https://www.tennessean.com/story/insider/extras/2015/04/06/archives-seigenthaler-for-100-years-the-tennessean-had-it-covered/25348545/ -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Chicago 1920 only - Jun 13 2:00 1:00 D Rule Chicago 1920 1921 - Oct lastSun 2:00 0 S Rule Chicago 1921 only - Mar lastSun 2:00 1:00 D @@ -523,7 +523,7 @@ Zone America/North_Dakota/Beulah -6:47:07 - LMT 1883 Nov 18 12:12:53 # El Paso Times. 2018-10-24 06:40 -06. # https://www.elpasotimes.com/story/news/local/el-paso/2018/10/24/el-pasoans-were-time-rebels-fought-stay-mountain-zone/1744509002/ # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Denver 1920 1921 - Mar lastSun 2:00 1:00 D Rule Denver 1920 only - Oct lastSun 2:00 0 S Rule Denver 1921 only - May 22 2:00 0 S @@ -576,7 +576,7 @@ Zone America/Denver -6:59:56 - LMT 1883 Nov 18 12:00:04 # https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1501&context=ca_ballot_props # https://repository.uchastings.edu/cgi/viewcontent.cgi?article=1636&context=ca_ballot_props # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule CA 1948 only - Mar 14 2:01 1:00 D Rule CA 1949 only - Jan 1 2:00 0 S Rule CA 1950 1966 - Apr lastSun 1:00 1:00 D @@ -934,7 +934,7 @@ Zone America/Boise -7:44:49 - LMT 1883 Nov 18 12:15:11 # going to switch from Central to Eastern Time on March 11, 2007.... # http://www.indystar.com/apps/pbcs.dll/article?AID=/20070207/LOCAL190108/702070524/0/LOCAL -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Indianapolis 1941 only - Jun 22 2:00 1:00 D Rule Indianapolis 1941 1954 - Sep lastSun 2:00 0 S Rule Indianapolis 1946 1954 - Apr lastSun 2:00 1:00 D @@ -953,7 +953,7 @@ Zone America/Indiana/Indianapolis -5:44:38 - LMT 1883 Nov 18 12:15:22 # # Eastern Crawford County, Indiana, left its clocks alone in 1974, # as well as from 1976 through 2005. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Marengo 1951 only - Apr lastSun 2:00 1:00 D Rule Marengo 1951 only - Sep lastSun 2:00 0 S Rule Marengo 1954 1960 - Apr lastSun 2:00 1:00 D @@ -972,7 +972,7 @@ Zone America/Indiana/Marengo -5:45:23 - LMT 1883 Nov 18 12:14:37 # Daviess, Dubois, Knox, and Martin Counties, Indiana, # switched from eastern to central time in April 2006, then switched back # in November 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Vincennes 1946 only - Apr lastSun 2:00 1:00 D Rule Vincennes 1946 only - Sep lastSun 2:00 0 S Rule Vincennes 1953 1954 - Apr lastSun 2:00 1:00 D @@ -997,7 +997,7 @@ Zone America/Indiana/Vincennes -5:50:07 - LMT 1883 Nov 18 12:09:53 # The Indianapolis News, Friday 27 October 1967 states that Perry County # returned to CST. It went again to EST on 27 April 1969, as documented by the # Indianapolis star of Saturday 26 April. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Perry 1955 only - May 1 0:00 1:00 D Rule Perry 1955 1960 - Sep lastSun 2:00 0 S Rule Perry 1956 1963 - Apr lastSun 2:00 1:00 D @@ -1014,7 +1014,7 @@ Zone America/Indiana/Tell_City -5:47:03 - LMT 1883 Nov 18 12:12:57 # # Pike County, Indiana moved from central to eastern time in 1977, # then switched back in 2006, then switched back again in 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Pike 1955 only - May 1 0:00 1:00 D Rule Pike 1955 1960 - Sep lastSun 2:00 0 S Rule Pike 1956 1964 - Apr lastSun 2:00 1:00 D @@ -1035,7 +1035,7 @@ Zone America/Indiana/Petersburg -5:49:07 - LMT 1883 Nov 18 12:10:53 # An article on page A3 of the Sunday, 1991-10-27 Washington Post # notes that Starke County switched from Central time to Eastern time as of # 1991-10-27. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Starke 1947 1961 - Apr lastSun 2:00 1:00 D Rule Starke 1947 1954 - Sep lastSun 2:00 0 S Rule Starke 1955 1956 - Oct lastSun 2:00 0 S @@ -1052,7 +1052,7 @@ Zone America/Indiana/Knox -5:46:30 - LMT 1883 Nov 18 12:13:30 # # Pulaski County, Indiana, switched from eastern to central time in # April 2006 and then switched back in March 2007. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Pulaski 1946 1960 - Apr lastSun 2:00 1:00 D Rule Pulaski 1946 1954 - Sep lastSun 2:00 0 S Rule Pulaski 1955 1956 - Oct lastSun 2:00 0 S @@ -1094,7 +1094,7 @@ Zone America/Indiana/Vevay -5:40:16 - LMT 1883 Nov 18 12:19:44 # # Part of Kentucky left its clocks alone in 1974. # This also includes Clark, Floyd, and Harrison counties in Indiana. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Louisville 1921 only - May 1 2:00 1:00 D Rule Louisville 1921 only - Sep 1 2:00 0 S Rule Louisville 1941 only - Apr lastSun 2:00 1:00 D @@ -1208,7 +1208,7 @@ Zone America/Kentucky/Monticello -5:39:24 - LMT 1883 Nov 18 12:20:36 # election Michigan voters narrowly repealed DST, effective 1969. # # Most of Michigan observed DST from 1973 on, but was a bit late in 1975. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Detroit 1948 only - Apr lastSun 2:00 1:00 D Rule Detroit 1948 only - Sep lastSun 2:00 0 S # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1225,7 +1225,7 @@ Zone America/Detroit -5:32:11 - LMT 1905 # # Dickinson, Gogebic, Iron, and Menominee Counties, Michigan, # switched from EST to CST/CDT in 1973. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER +# Rule NAME FROM TO - IN ON AT SAVE LETTER Rule Menominee 1946 only - Apr lastSun 2:00 1:00 D Rule Menominee 1946 only - Sep lastSun 2:00 0 S Rule Menominee 1966 only - Apr lastSun 2:00 1:00 D @@ -1395,7 +1395,7 @@ Zone America/Menominee -5:50:27 - LMT 1885 Sep 18 12:00 # Oct 31, to Oct 27, 1918 (and Sunday is a more likely transition day # than Thursday) in all Canadian rulesets. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Canada 1918 only - Apr 14 2:00 1:00 D Rule Canada 1918 only - Oct 27 2:00 0 S Rule Canada 1942 only - Feb 9 2:00 1:00 W # War @@ -1418,7 +1418,7 @@ Rule Canada 2007 max - Nov Sun>=1 2:00 0 S # that follows the rules is the southeast corner, including Port Hope # Simpson and Mary's Harbour, but excluding, say, Black Tickle. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule StJohns 1917 only - Apr 8 2:00 1:00 D Rule StJohns 1917 only - Sep 17 2:00 0 S # Whitman gives 1919 Apr 5 and 1920 Apr 5; go with Shanks & Pottenger. @@ -1520,7 +1520,7 @@ Zone America/Goose_Bay -4:01:40 - LMT 1884 # Happy Valley-Goose Bay # bill say that it is "accommodating the customs and practices" of those # regions, which suggests that they have always been in-line with Halifax. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Halifax 1916 only - Apr 1 0:00 1:00 D Rule Halifax 1916 only - Oct 1 0:00 0 S Rule Halifax 1920 only - May 9 0:00 1:00 D @@ -1586,7 +1586,7 @@ Zone America/Glace_Bay -3:59:48 - LMT 1902 Jun 15 # clear that this was the case since at least 1993. # For now, assume it started in 1993. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Moncton 1933 1935 - Jun Sun>=8 1:00 1:00 D Rule Moncton 1933 1935 - Sep Sun>=8 1:00 0 S Rule Moncton 1936 1938 - Jun Sun>=1 1:00 1:00 D @@ -1795,7 +1795,7 @@ Zone America/Blanc-Sablon -3:48:28 - LMT 1884 # With some exceptions, the use of daylight saving may be said to be limited # to those cities and towns lying between Quebec city and Windsor, Ont. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Toronto 1919 only - Mar 30 23:30 1:00 D Rule Toronto 1919 only - Oct 26 0:00 0 S Rule Toronto 1920 only - May 2 2:00 1:00 D @@ -1893,7 +1893,7 @@ Zone America/Atikokan -6:06:28 - LMT 1895 # starting 1966. Since 02:00s is clearly correct for 1967 on, assume # it was also 02:00s in 1966. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Winn 1916 only - Apr 23 0:00 1:00 D Rule Winn 1916 only - Sep 17 0:00 0 S Rule Winn 1918 only - Apr 14 2:00 1:00 D @@ -1984,7 +1984,7 @@ Zone America/Winnipeg -6:28:36 - LMT 1887 Jul 16 # long and rather painful to read. # http://www.qp.gov.sk.ca/documents/English/Statutes/Statutes/T14.pdf -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Regina 1918 only - Apr 14 2:00 1:00 D Rule Regina 1918 only - Oct 27 2:00 0 S Rule Regina 1930 1934 - May Sun>=1 0:00 1:00 D @@ -2034,7 +2034,7 @@ Zone America/Swift_Current -7:11:20 - LMT 1905 Sep # Boyer JP. Forcing Choice: The Risky Reward of Referendums. Dundum. 2017. # ISBN 978-1459739123. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Edm 1918 1919 - Apr Sun>=8 2:00 1:00 D Rule Edm 1918 only - Oct 27 2:00 0 S Rule Edm 1919 only - May 27 2:00 0 S @@ -2143,7 +2143,7 @@ Zone America/Edmonton -7:33:52 - LMT 1906 Sep # https://searcharchives.vancouver.ca/daylight-saving-1918-starts-again-july-7-1941-start-d-s-sept-27-end-of-d-s-1941 # We have no further details, so omit them for now. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Vanc 1918 only - Apr 14 2:00 1:00 D Rule Vanc 1918 only - Oct 27 2:00 0 S Rule Vanc 1942 only - Feb 9 2:00 1:00 W # War @@ -2472,7 +2472,19 @@ Zone America/Creston -7:46:04 - LMT 1884 # consistency with nearby Dawson Creek, Creston, and Fort Nelson. # https://yukon.ca/en/news/yukon-end-seasonal-time-change -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# From Andrew G. Smith (2020-09-24): +# Yukon has completed its regulatory change to be on UTC -7 year-round.... +# http://www.gov.yk.ca/legislation/regs/oic2020_125.pdf +# What we have done is re-defined Yukon Standard Time, as we are +# authorized to do under section 33 of our Interpretation Act: +# http://www.gov.yk.ca/legislation/acts/interpretation_c.pdf +# +# From Paul Eggert (2020-09-24): +# tzdb uses the obsolete YST abbreviation for standard time in Yukon through +# about 1970, and uses PST for standard time in Yukon since then. Consistent +# with that, use MST for -07, the new standard time in Yukon effective Nov. 1. + +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule NT_YK 1918 only - Apr 14 2:00 1:00 D Rule NT_YK 1918 only - Oct 27 2:00 0 S Rule NT_YK 1919 only - May 25 2:00 1:00 D @@ -2526,12 +2538,12 @@ Zone America/Inuvik 0 - -00 1953 # Inuvik founded Zone America/Whitehorse -9:00:12 - LMT 1900 Aug 20 -9:00 NT_YK Y%sT 1967 May 28 0:00 -8:00 NT_YK P%sT 1980 - -8:00 Canada P%sT 2020 Mar 8 2:00 + -8:00 Canada P%sT 2020 Nov 1 -7:00 - MST Zone America/Dawson -9:17:40 - LMT 1900 Aug 20 -9:00 NT_YK Y%sT 1973 Oct 28 0:00 -8:00 NT_YK P%sT 1980 - -8:00 Canada P%sT 2020 Mar 8 2:00 + -8:00 Canada P%sT 2020 Nov 1 -7:00 - MST @@ -2746,7 +2758,7 @@ Zone America/Dawson -9:17:40 - LMT 1900 Aug 20 # 5- The islands, reefs and keys shall take their timezone from the # longitude they are located at. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Mexico 1939 only - Feb 5 0:00 1:00 D Rule Mexico 1939 only - Jun 25 0:00 0 S Rule Mexico 1940 only - Dec 9 0:00 1:00 D @@ -2951,7 +2963,7 @@ Zone America/Tijuana -7:48:04 - LMT 1922 Jan 1 0:11:56 # rules to sync with the U.S. starting in 2007.... # http://www.jonesbahamas.com/?c=45&a=10412 -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Bahamas 1964 1975 - Oct lastSun 2:00 0 S Rule Bahamas 1964 1975 - Apr lastSun 2:00 1:00 D # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -2963,7 +2975,7 @@ Zone America/Nassau -5:09:30 - LMT 1912 Mar 2 # For 1899 Milne gives -3:58:29.2; round that. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Barb 1977 only - Jun 12 2:00 1:00 D Rule Barb 1977 1978 - Oct Sun>=1 2:00 0 S Rule Barb 1978 1980 - Apr Sun>=15 2:00 1:00 D @@ -2976,7 +2988,7 @@ Zone America/Barbados -3:58:29 - LMT 1924 # Bridgetown # Belize # Whitman entirely disagrees with Shanks; go with Shanks & Pottenger. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Belize 1918 1942 - Oct Sun>=2 0:00 0:30 -0530 Rule Belize 1919 1943 - Feb Sun>=9 0:00 0 CST Rule Belize 1973 only - Dec 5 0:00 1:00 CDT @@ -3013,7 +3025,7 @@ Zone Atlantic/Bermuda -4:19:18 - LMT 1930 Jan 1 2:00 # Hamilton # Milne gives -5:36:13.3 as San José mean time; round to nearest. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule CR 1979 1980 - Feb lastSun 0:00 1:00 D Rule CR 1979 1980 - Jun Sun>=1 0:00 0 S Rule CR 1991 1992 - Jan Sat>=15 0:00 1:00 D @@ -3187,7 +3199,7 @@ Zone America/Costa_Rica -5:36:13 - LMT 1890 # San José # From Paul Eggert (2012-11-03): # For now, assume the future rule is first Sunday in November. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Cuba 1928 only - Jun 10 0:00 1:00 D Rule Cuba 1928 only - Oct 10 0:00 0 S Rule Cuba 1940 1942 - Jun Sun>=1 0:00 1:00 D @@ -3256,7 +3268,7 @@ Zone America/Havana -5:29:28 - LMT 1890 # decided to revert. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule DR 1966 only - Oct 30 0:00 1:00 EDT Rule DR 1967 only - Feb 28 0:00 0 EST Rule DR 1969 1973 - Oct lastSun 0:00 0:30 -0430 @@ -3273,7 +3285,7 @@ Zone America/Santo_Domingo -4:39:36 - LMT 1890 # El Salvador -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Salv 1987 1988 - May Sun>=1 0:00 1:00 D Rule Salv 1987 1988 - Sep lastSun 0:00 0 S # There are too many San Salvadors elsewhere, so use America/El_Salvador @@ -3302,7 +3314,7 @@ Zone America/El_Salvador -5:56:48 - LMT 1921 # San Salvador # (2006-04-19), says DST ends at 24:00. See # http://www.sieca.org.gt/Sitio_publico/Energeticos/Doc/Medidas/Cambio_Horario_Nac_190406.pdf -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Guat 1973 only - Nov 25 0:00 1:00 D Rule Guat 1974 only - Feb 24 0:00 0 S Rule Guat 1983 only - May 21 0:00 1:00 D @@ -3383,7 +3395,7 @@ Zone America/Guatemala -6:02:04 - LMT 1918 Oct 5 # I have not been able to find a more authoritative source: # https://www.haitilibre.com/en/news-20319-haiti-notices-time-change-in-haiti.html -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Haiti 1983 only - May 8 0:00 1:00 D Rule Haiti 1984 1987 - Apr lastSun 0:00 1:00 D Rule Haiti 1983 1987 - Oct lastSun 0:00 0 S @@ -3431,7 +3443,7 @@ Zone America/Port-au-Prince -4:49:20 - LMT 1890 # http://www.laprensahn.com/pais_nota.php?id04962=7386 # So it seems that Honduras will not enter DST this year.... -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Hond 1987 1988 - May Sun>=1 0:00 1:00 D Rule Hond 1987 1988 - Sep lastSun 0:00 0 S Rule Hond 2006 only - May Sun>=1 0:00 1:00 D @@ -3522,7 +3534,7 @@ Zone America/Martinique -4:04:20 - LMT 1890 # Fort-de-France # The natural sun time is restored in all the national territory, in that the # time is returned one hour at 01:00 am of October 1 of 2006. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Nic 1979 1980 - Mar Sun>=16 0:00 1:00 D Rule Nic 1979 1980 - Jun Mon>=23 0:00 0 S Rule Nic 2005 only - Apr 10 0:00 1:00 D diff --git a/make/data/tzdata/pacificnew b/make/data/tzdata/pacificnew deleted file mode 100644 index f19a876372c..00000000000 --- a/make/data/tzdata/pacificnew +++ /dev/null @@ -1,52 +0,0 @@ -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. Oracle designates this -# particular file as subject to the "Classpath" exception as provided -# by Oracle in the LICENSE file that accompanied this code. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# tzdb data for proposed US election time (this file is obsolete) - -# This file is in the public domain, so clarified as of -# 2009-05-17 by Arthur David Olson. - -# From Arthur David Olson (1989-04-05): -# On 1989-04-05, the U. S. House of Representatives passed (238-154) a bill -# establishing "Pacific Presidential Election Time"; it was not acted on -# by the Senate or signed into law by the President. -# You might want to change the "PE" (Presidential Election) below to -# "Q" (Quadrennial) to maintain three-character zone abbreviations. -# If you're really conservative, you might want to change it to "D". -# Avoid "L" (Leap Year), which won't be true in 2100. - -# If Presidential Election Time is ever established, replace "XXXX" below -# with the year the law takes effect and uncomment the "##" lines. - -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -## Rule Twilite XXXX max - Apr Sun>=1 2:00 1:00 D -## Rule Twilite XXXX max uspres Oct lastSun 2:00 1:00 PE -## Rule Twilite XXXX max uspres Nov Sun>=7 2:00 0 S -## Rule Twilite XXXX max nonpres Oct lastSun 2:00 0 S - -# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL] -## Zone America/Los_Angeles-PET -8:00 US P%sT XXXX -## -8:00 Twilite P%sT - -# For now... -Link America/Los_Angeles US/Pacific-New ## diff --git a/make/data/tzdata/southamerica b/make/data/tzdata/southamerica index 51795f7621b..566dabfadb4 100644 --- a/make/data/tzdata/southamerica +++ b/make/data/tzdata/southamerica @@ -71,7 +71,7 @@ # I am sending modifications to the Argentine time zone table... # AR was chosen because they are the ISO letters that represent Argentina. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Arg 1930 only - Dec 1 0:00 1:00 - Rule Arg 1931 only - Apr 1 0:00 0 - Rule Arg 1931 only - Oct 15 0:00 1:00 - @@ -792,7 +792,7 @@ Zone America/La_Paz -4:32:36 - LMT 1890 # From Paul Eggert (2013-10-17): # For now, assume western Amazonas will change as well. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S # Decree 20,466 (1931-10-01) # Decree 21,896 (1932-01-10) Rule Brazil 1931 only - Oct 3 11:00 1:00 - @@ -1281,7 +1281,7 @@ Zone America/Rio_Branco -4:31:12 - LMT 1914 # For now, assume that they will not revert, # since they have extended the expiration date once already. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Chile 1927 1931 - Sep 1 0:00 1:00 - Rule Chile 1928 1932 - Apr 1 0:00 0 - Rule Chile 1968 only - Nov 3 4:00u 1:00 - @@ -1381,7 +1381,7 @@ Zone Antarctica/Palmer 0 - -00 1965 # Milne gives 4:56:16.4 for Bogotá time in 1899; round to nearest. He writes, # "A variation of fifteen minutes in the public clocks of Bogota is not rare." -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule CO 1992 only - May 3 0:00 1:00 - Rule CO 1993 only - Apr 4 0:00 0 - # Zone NAME STDOFF RULES FORMAT [UNTIL] @@ -1441,7 +1441,7 @@ Link America/Curacao America/Kralendijk # Caribbean Netherlands # (Not one step back), the clocks went back in 1993 and the experiment was not # repeated. For now, assume transitions were at 00:00 local time country-wide. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Ecuador 1992 only - Nov 28 0:00 1:00 - Rule Ecuador 1993 only - Feb 5 0:00 0 - # @@ -1535,7 +1535,7 @@ Zone Pacific/Galapagos -5:58:24 - LMT 1931 # Puerto Baquerizo Moreno # For now we will assume permanent -03 for the Falklands # until advised differently (to apply for 2012 and beyond, after the 2011 # experiment was apparently successful.) -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Falk 1937 1938 - Sep lastSun 0:00 1:00 - Rule Falk 1938 1942 - Mar Sun>=19 0:00 0 - Rule Falk 1939 only - Oct 1 0:00 1:00 - @@ -1581,7 +1581,7 @@ Zone America/Guyana -3:52:40 - LMT 1915 Mar # Georgetown # No time of the day is established for the adjustment, so people normally # adjust their clocks at 0 hour of the given dates. # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Para 1975 1988 - Oct 1 0:00 1:00 - Rule Para 1975 1978 - Mar 1 0:00 0 - Rule Para 1979 1991 - Apr 1 0:00 0 - @@ -1674,7 +1674,7 @@ Zone America/Asuncion -3:50:40 - LMT 1890 # From Paul Eggert (2006-03-22): # Shanks & Pottenger don't have this transition. Assume 1986 was like 1987. -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Peru 1938 only - Jan 1 0:00 1:00 - Rule Peru 1938 only - Apr 1 0:00 0 - Rule Peru 1938 1939 - Sep lastSun 0:00 1:00 - @@ -1770,7 +1770,7 @@ Link America/Port_of_Spain America/Tortola # Virgin Islands (UK) # https://www.impo.com.uy/diariooficial/1926/03/10/2 # https://www.impo.com.uy/diariooficial/1926/03/18/2 # -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S +# Rule NAME FROM TO - IN ON AT SAVE LETTER/S Rule Uruguay 1923 1925 - Oct 1 0:00 0:30 - Rule Uruguay 1924 1926 - Apr 1 0:00 0 - # From Tim Parenti (2018-02-15): diff --git a/make/data/tzdata/systemv b/make/data/tzdata/systemv deleted file mode 100644 index 9525ec47171..00000000000 --- a/make/data/tzdata/systemv +++ /dev/null @@ -1,62 +0,0 @@ -# -# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -# -# This code is free software; you can redistribute it and/or modify it -# under the terms of the GNU General Public License version 2 only, as -# published by the Free Software Foundation. Oracle designates this -# particular file as subject to the "Classpath" exception as provided -# by Oracle in the LICENSE file that accompanied this code. -# -# This code is distributed in the hope that it will be useful, but WITHOUT -# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -# version 2 for more details (a copy is included in the LICENSE file that -# accompanied this code). -# -# You should have received a copy of the GNU General Public License version -# 2 along with this work; if not, write to the Free Software Foundation, -# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -# -# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -# or visit www.oracle.com if you need additional information or have any -# questions. -# -# tzdb data for System V rules (this file is obsolete) - -# This file is in the public domain, so clarified as of -# 2009-05-17 by Arthur David Olson. - -# Old rules, should the need arise. -# No attempt is made to handle Newfoundland, since it cannot be expressed -# using the System V "TZ" scheme (half-hour offset), or anything outside -# North America (no support for non-standard DST start/end dates), nor -# the changes in the DST rules in the US after 1976 (which occurred after -# the old rules were written). -# -# If you need the old rules, uncomment ## lines. -# Compile this *without* leap second correction for true conformance. - -# Rule NAME FROM TO TYPE IN ON AT SAVE LETTER/S -Rule SystemV min 1973 - Apr lastSun 2:00 1:00 D -Rule SystemV min 1973 - Oct lastSun 2:00 0 S -Rule SystemV 1974 only - Jan 6 2:00 1:00 D -Rule SystemV 1974 only - Nov lastSun 2:00 0 S -Rule SystemV 1975 only - Feb 23 2:00 1:00 D -Rule SystemV 1975 only - Oct lastSun 2:00 0 S -Rule SystemV 1976 max - Apr lastSun 2:00 1:00 D -Rule SystemV 1976 max - Oct lastSun 2:00 0 S - -# Zone NAME STDOFF RULES/SAVE FORMAT [UNTIL] -## Zone SystemV/AST4ADT -4:00 SystemV A%sT -## Zone SystemV/EST5EDT -5:00 SystemV E%sT -## Zone SystemV/CST6CDT -6:00 SystemV C%sT -## Zone SystemV/MST7MDT -7:00 SystemV M%sT -## Zone SystemV/PST8PDT -8:00 SystemV P%sT -## Zone SystemV/YST9YDT -9:00 SystemV Y%sT -## Zone SystemV/AST4 -4:00 - AST -## Zone SystemV/EST5 -5:00 - EST -## Zone SystemV/CST6 -6:00 - CST -## Zone SystemV/MST7 -7:00 - MST -## Zone SystemV/PST8 -8:00 - PST -## Zone SystemV/YST9 -9:00 - YST -## Zone SystemV/HST10 -10:00 - HST diff --git a/make/devkit/createJMHBundle.sh b/make/devkit/createJMHBundle.sh index b56950c41ec..b460ee75311 100644 --- a/make/devkit/createJMHBundle.sh +++ b/make/devkit/createJMHBundle.sh @@ -26,7 +26,7 @@ # Create a bundle in the build directory, containing what's needed to # build and run JMH microbenchmarks from the OpenJDK build. -JMH_VERSION=1.21 +JMH_VERSION=1.26 COMMONS_MATH3_VERSION=3.2 JOPT_SIMPLE_VERSION=4.6 diff --git a/make/devkit/createMacosxDevkit.sh b/make/devkit/createMacosxDevkit.sh index 2a7dfe2037b..cd105823366 100644 --- a/make/devkit/createMacosxDevkit.sh +++ b/make/devkit/createMacosxDevkit.sh @@ -91,7 +91,6 @@ EXCLUDE_DIRS=" \ Platforms/AppleTVSimulator.platform \ Platforms/iPhoneSimulator.platform \ Platforms/WatchSimulator.platform \ - Contents/SharedFrameworks/LLDB.framework \ Contents/SharedFrameworks/ModelIO.framework \ Contents/SharedFrameworks/XCSUI.framework \ Contents/SharedFrameworks/SceneKit.framework \ diff --git a/make/devkit/createWindowsDevkit2017.sh b/make/devkit/createWindowsDevkit2017.sh index 91227259bdf..42c13251293 100644 --- a/make/devkit/createWindowsDevkit2017.sh +++ b/make/devkit/createWindowsDevkit2017.sh @@ -138,8 +138,8 @@ cp -r "$VS_INSTALL_DIR/$REDIST_SUBDIR/x86" $DEVKIT_ROOT/VC/redist/ cp $DEVKIT_ROOT/VC/redist/x86/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x86 cp $DEVKIT_ROOT/VC/redist/x86/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x86 cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64 -cp $DEVKIT_ROOT/VC/redist/x64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/x64 -cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64 +cp $DEVKIT_ROOT/VC/redist/x64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/x64 +cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCR_DLL $DEVKIT_ROOT/VC/bin/arm64 cp $DEVKIT_ROOT/VC/redist/arm64/$MSVCP_DLL $DEVKIT_ROOT/VC/bin/arm64 ################################################################################ diff --git a/make/hotspot/gensrc/GensrcAdlc.gmk b/make/hotspot/gensrc/GensrcAdlc.gmk index 733658d5d8b..fb7d48f1e27 100644 --- a/make/hotspot/gensrc/GensrcAdlc.gmk +++ b/make/hotspot/gensrc/GensrcAdlc.gmk @@ -138,6 +138,7 @@ ifeq ($(call check-jvm-feature, compiler2), true) ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64) AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \ + $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_neon.ad \ $d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \ ))) endif diff --git a/make/hotspot/gensrc/GensrcJvmti.gmk b/make/hotspot/gensrc/GensrcJvmti.gmk index 312c8bc737a..b31a6f52292 100644 --- a/make/hotspot/gensrc/GensrcJvmti.gmk +++ b/make/hotspot/gensrc/GensrcJvmti.gmk @@ -106,17 +106,6 @@ $(eval $(call SetupJvmtiGeneration, jvmti.h, jvmtiH.xsl, \ $(eval $(call SetupJvmtiGeneration, jvmti.html, jvmti.xsl, \ -PARAM majorversion $(VERSION_FEATURE))) -JVMTI_BC_SRCDIR := $(TOPDIR)/src/hotspot/share/interpreter/zero - -ifeq ($(call check-jvm-feature, zero), true) - $(eval $(call SetupXslTransform, bytecodeInterpreterWithChecks.cpp, \ - XML_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xml, \ - XSL_FILE := $(JVMTI_BC_SRCDIR)/bytecodeInterpreterWithChecks.xsl, \ - OUTPUT_DIR := $(JVMTI_OUTPUTDIR), \ - DEPS := $(JVMTI_BC_SRCDIR)/bytecodeInterpreter.cpp, \ - )) -endif - ################################################################################ # Copy jvmti.h to include dir diff --git a/make/hotspot/lib/CompileJvm.gmk b/make/hotspot/lib/CompileJvm.gmk index 441c09a3853..65edd047571 100644 --- a/make/hotspot/lib/CompileJvm.gmk +++ b/make/hotspot/lib/CompileJvm.gmk @@ -91,11 +91,11 @@ DISABLED_WARNINGS_clang := tautological-compare \ undefined-var-template sometimes-uninitialized unknown-pragmas \ delete-non-virtual-dtor missing-braces char-subscripts \ ignored-qualifiers missing-field-initializers mismatched-tags \ - shift-negative-value + shift-negative-value misleading-indentation DISABLED_WARNINGS_xlc := tautological-compare shift-negative-value -DISABLED_WARNINGS_microsoft := 4100 4127 4201 4244 4291 4351 \ +DISABLED_WARNINGS_microsoft := 4100 4127 4146 4201 4244 4291 4351 \ 4511 4512 4514 4624 4996 ################################################################################ diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk index 3647806e1d7..d96d006c5fc 100644 --- a/make/hotspot/lib/JvmFeatures.gmk +++ b/make/hotspot/lib/JvmFeatures.gmk @@ -126,6 +126,7 @@ ifneq ($(call check-jvm-feature, cds), true) dynamicArchive.cpp \ filemap.cpp \ heapShared.cpp \ + lambdaFormInvokers.cpp \ metaspaceShared.cpp \ metaspaceShared_$(HOTSPOT_TARGET_CPU).cpp \ metaspaceShared_$(HOTSPOT_TARGET_CPU_ARCH).cpp \ diff --git a/make/hotspot/symbols/symbols-aix b/make/hotspot/symbols/symbols-aix index 0efd2dba97f..92703573a5f 100644 --- a/make/hotspot/symbols/symbols-aix +++ b/make/hotspot/symbols/symbols-aix @@ -21,7 +21,7 @@ # questions. # -JVM_handle_linux_signal +JVM_handle_aix_signal numa_error numa_warn sysThreadAvailableStackWithSlack diff --git a/make/hotspot/symbols/symbols-unix b/make/hotspot/symbols/symbols-unix index 97aa40b970b..1781d84ab94 100644 --- a/make/hotspot/symbols/symbols-unix +++ b/make/hotspot/symbols/symbols-unix @@ -143,14 +143,15 @@ JVM_InternString JVM_Interrupt JVM_InvokeMethod JVM_IsArrayClass -JVM_IsDynamicDumpingEnabled -JVM_IsSharingEnabled +JVM_IsCDSDumpingEnabled JVM_IsConstructorIx +JVM_IsDumpingClassList JVM_IsHiddenClass JVM_IsInterface JVM_IsPrimitiveClass JVM_IsRecord JVM_IsSameClassPackage +JVM_IsSharingEnabled JVM_IsSupportedJNIVersion JVM_IsThreadAlive JVM_IsVMGeneratedMethodIx @@ -158,6 +159,7 @@ JVM_LatestUserDefinedLoader JVM_LoadLibrary JVM_LookupDefineClass JVM_LookupLambdaProxyClassFromArchive +JVM_LogLambdaFormInvoker JVM_MaxMemory JVM_MaxObjectInspectionAge JVM_MonitorNotify @@ -169,11 +171,13 @@ JVM_NativePath JVM_NewArray JVM_NewInstanceFromConstructor JVM_NewMultiArray +JVM_PhantomReferenceRefersTo JVM_RaiseSignal JVM_RawMonitorCreate JVM_RawMonitorDestroy JVM_RawMonitorEnter JVM_RawMonitorExit +JVM_ReferenceRefersTo JVM_RegisterLambdaProxyClassForArchiving JVM_RegisterSignal JVM_ReleaseUTF diff --git a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java index 54c60eb43d1..653a1db10dd 100644 --- a/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java +++ b/make/jdk/src/classes/build/tools/blacklistedcertsconverter/BlacklistedCertsConverter.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2013, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,14 +25,24 @@ package build.tools.blacklistedcertsconverter; +import java.io.IOException; +import java.math.BigInteger; import java.security.MessageDigest; +import java.security.PublicKey; import java.security.cert.Certificate; import java.security.cert.CertificateFactory; import java.security.cert.X509Certificate; +import java.security.interfaces.ECPublicKey; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collection; +import java.util.List; import java.util.Set; import java.util.TreeSet; +import sun.security.util.DerInputStream; +import sun.security.util.DerOutputStream; +import sun.security.util.DerValue; /** * Converts blacklisted.certs.pem from System.in to blacklisted.certs in @@ -75,8 +85,8 @@ public static void main(String[] args) throws Exception { // Output sorted so that it's easy to locate an entry. Set fingerprints = new TreeSet<>(); for (Certificate cert: certs) { - fingerprints.add( - getCertificateFingerPrint(mdAlg, (X509Certificate)cert)); + fingerprints.addAll( + getCertificateFingerPrints(mdAlg, (X509Certificate)cert)); } for (String s: fingerprints) { @@ -97,17 +107,90 @@ private static void byte2hex(byte b, StringBuffer buf) { } /** - * Gets the requested finger print of the certificate. + * Computes the possible fingerprints of the certificate. */ - private static String getCertificateFingerPrint( + private static List getCertificateFingerPrints( String mdAlg, X509Certificate cert) throws Exception { - byte[] encCertInfo = cert.getEncoded(); - MessageDigest md = MessageDigest.getInstance(mdAlg); - byte[] digest = md.digest(encCertInfo); - StringBuffer buf = new StringBuffer(); - for (int i = 0; i < digest.length; i++) { - byte2hex(digest[i], buf); + List fingerprints = new ArrayList<>(); + for (byte[] encoding : altEncodings(cert)) { + MessageDigest md = MessageDigest.getInstance(mdAlg); + byte[] digest = md.digest(encoding); + StringBuffer buf = new StringBuffer(); + for (int i = 0; i < digest.length; i++) { + byte2hex(digest[i], buf); + } + fingerprints.add(buf.toString()); + } + return fingerprints; + } + + private static List altEncodings(X509Certificate c) + throws Exception { + List result = new ArrayList<>(); + + DerValue d = new DerValue(c.getEncoded()); + DerValue[] seq = new DerValue[3]; + // tbsCertificate + seq[0] = d.data.getDerValue(); + // signatureAlgorithm + seq[1] = d.data.getDerValue(); + // signature + seq[2] = d.data.getDerValue(); + + List algIds = Arrays.asList(seq[1], altAlgId(seq[1])); + + List sigs; + PublicKey p = c.getPublicKey(); + if (p instanceof ECPublicKey) { + ECPublicKey ep = (ECPublicKey) p; + BigInteger mod = ep.getParams().getOrder(); + sigs = Arrays.asList(seq[2], altSig(mod, seq[2])); + } else { + sigs = Arrays.asList(seq[2]); + } + + for (DerValue algId : algIds) { + for (DerValue sig : sigs) { + DerOutputStream tmp = new DerOutputStream(); + tmp.putDerValue(seq[0]); + tmp.putDerValue(algId); + tmp.putDerValue(sig); + DerOutputStream tmp2 = new DerOutputStream(); + tmp2.write(DerValue.tag_Sequence, tmp); + result.add(tmp2.toByteArray()); + } + } + return result; + } + + private static DerValue altSig(BigInteger mod, DerValue sig) + throws IOException { + byte[] sigBits = sig.getBitString(); + DerInputStream in = + new DerInputStream(sigBits, 0, sigBits.length, false); + DerValue[] values = in.getSequence(2); + BigInteger r = values[0].getBigInteger(); + BigInteger s = values[1].getBigInteger(); + BigInteger s2 = s.negate().mod(mod); + DerOutputStream out = new DerOutputStream(); + out.putInteger(r); + out.putInteger(s2); + DerOutputStream tmp = new DerOutputStream(); + tmp.putBitString(new DerValue(DerValue.tag_Sequence, + out.toByteArray()).toByteArray()); + return new DerValue(tmp.toByteArray()); + } + + private static DerValue altAlgId(DerValue algId) throws IOException { + DerInputStream in = algId.toDerInputStream(); + DerOutputStream bytes = new DerOutputStream(); + bytes.putOID(in.getOID()); + // encode parameters as NULL if not present or omit if NULL + if (in.available() == 0) { + bytes.putNull(); } - return buf.toString(); + DerOutputStream tmp = new DerOutputStream(); + tmp.write(DerValue.tag_Sequence, bytes); + return new DerValue(tmp.toByteArray()); } } diff --git a/make/jdk/src/classes/build/tools/spp/Spp.java b/make/jdk/src/classes/build/tools/spp/Spp.java index 6921c65667b..2a0cb57bc39 100644 --- a/make/jdk/src/classes/build/tools/spp/Spp.java +++ b/make/jdk/src/classes/build/tools/spp/Spp.java @@ -106,7 +106,7 @@ public static void main(String args[]) throws Exception { static final String LNSEP = System.getProperty("line.separator"); static final String KEY = "([a-zA-Z0-9]+)"; static final String VAR = "([a-zA-Z0-9_\\-]+)"; - static final String TEXT = "([a-zA-Z0-9&;,.<>/#() \\?\\[\\]\\$]+)"; // $ -- hack embedded $var$ + static final String TEXT = "([\\p{Print}&&[^{#:}]]+)"; static final int GN_NOT = 1; static final int GN_KEY = 2; @@ -140,6 +140,10 @@ void append(StringBuffer buf, String ln, } } } + if (repl == null) { + System.err.println("Error: undefined variable in line " + ln); + System.exit(-1); + } vardef.appendReplacement(buf, repl); } vardef.appendTail(buf); diff --git a/make/modules/java.base/Copy.gmk b/make/modules/java.base/Copy.gmk index 9071f4e6e37..040b7588ba1 100644 --- a/make/modules/java.base/Copy.gmk +++ b/make/modules/java.base/Copy.gmk @@ -182,12 +182,16 @@ endif ################################################################################ -$(eval $(call SetupCopyFiles, COPY_NET_PROPERTIES, \ - FILES := $(TOPDIR)/src/java.base/share/conf/net.properties, \ - DEST := $(CONF_DST_DIR), \ -)) +NET_PROPERTIES_SRCS := $(TOPDIR)/src/java.base/share/conf/net.properties \ + $(TOPDIR)/src/java.base/$(OPENJDK_TARGET_OS_TYPE)/conf/net.properties + +NET_PROPERTIES_DST := $(CONF_DST_DIR)/net.properties + +$(NET_PROPERTIES_DST): $(NET_PROPERTIES_SRCS) + $(call MakeTargetDir) + $(CAT) $(NET_PROPERTIES_SRCS) > $@ -TARGETS += $(COPY_NET_PROPERTIES) +TARGETS += $(NET_PROPERTIES_DST) ifeq ($(call isTargetOs, linux), true) $(eval $(call SetupCopyFiles, COPY_SDP_CONF, \ diff --git a/make/modules/java.base/gendata/GendataTZDB.gmk b/make/modules/java.base/gendata/GendataTZDB.gmk index 54e6582d81d..1352178694f 100644 --- a/make/modules/java.base/gendata/GendataTZDB.gmk +++ b/make/modules/java.base/gendata/GendataTZDB.gmk @@ -29,7 +29,7 @@ GENDATA_TZDB := # Time zone data file creation # TZDATA_DIR := $(TOPDIR)/make/data/tzdata -TZDATA_TZFILE := africa antarctica asia australasia europe northamerica pacificnew southamerica backward etcetera gmt jdk11_backward +TZDATA_TZFILE := africa antarctica asia australasia europe northamerica southamerica backward etcetera gmt jdk11_backward TZDATA_TZFILES := $(addprefix $(TZDATA_DIR)/,$(TZDATA_TZFILE)) GENDATA_TZDB_DAT := $(SUPPORT_OUTPUTDIR)/modules_libs/$(MODULE)/tzdb.dat diff --git a/make/modules/java.base/lib/CoreLibraries.gmk b/make/modules/java.base/lib/CoreLibraries.gmk index f2b94fe717e..1d5fede2aa8 100644 --- a/make/modules/java.base/lib/CoreLibraries.gmk +++ b/make/modules/java.base/lib/CoreLibraries.gmk @@ -49,7 +49,7 @@ $(eval $(call SetupNativeCompilation, BUILD_LIBFDLIBM, \ CFLAGS_windows_debug := -DLOGGING, \ CFLAGS_aix := -qfloat=nomaf, \ DISABLED_WARNINGS_gcc := sign-compare misleading-indentation array-bounds, \ - DISABLED_WARNINGS_clang := sign-compare, \ + DISABLED_WARNINGS_clang := sign-compare misleading-indentation, \ DISABLED_WARNINGS_microsoft := 4146 4244 4018, \ ARFLAGS := $(ARFLAGS), \ OBJECT_DIR := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libfdlibm, \ diff --git a/make/modules/java.desktop/lib/Awt2dLibraries.gmk b/make/modules/java.desktop/lib/Awt2dLibraries.gmk index 7fbd1049f89..3203378d00a 100644 --- a/make/modules/java.desktop/lib/Awt2dLibraries.gmk +++ b/make/modules/java.desktop/lib/Awt2dLibraries.gmk @@ -435,7 +435,6 @@ endif ifeq ($(USE_EXTERNAL_HARFBUZZ), true) LIBHARFBUZZ_LIBS := $(HARFBUZZ_LIBS) else - HARFBUZZ_CFLAGS := -DHAVE_OT -DHAVE_FALLBACK -DHAVE_UCDN -DHAVE_ROUND # This is better than adding EXPORT_ALL_SYMBOLS ifneq ($(filter $(TOOLCHAIN_TYPE), gcc clang), ) @@ -493,7 +492,7 @@ else maybe-uninitialized class-memaccess, \ DISABLED_WARNINGS_clang := unused-value incompatible-pointer-types \ tautological-constant-out-of-range-compare int-to-pointer-cast \ - undef missing-field-initializers, \ + undef missing-field-initializers range-loop-analysis, \ DISABLED_WARNINGS_microsoft := 4267 4244 4090 4146 4334 4819 4101 4068 4805 4138, \ LDFLAGS := $(LDFLAGS_JDKLIB) \ $(call SET_SHARED_LIBRARY_ORIGIN), \ diff --git a/make/modules/jdk.javadoc/Gendata.gmk b/make/modules/jdk.javadoc/Gendata.gmk index 5b4485808c7..0ee146a1e21 100644 --- a/make/modules/jdk.javadoc/Gendata.gmk +++ b/make/modules/jdk.javadoc/Gendata.gmk @@ -54,7 +54,7 @@ $(eval $(call SetupJavaCompilation, COMPILE_CREATE_SYMBOLS, \ SRC := $(TOPDIR)/make/langtools/src/classes \ $(TOPDIR)/src/jdk.jdeps/share/classes, \ INCLUDES := build/tools/symbolgenerator com/sun/tools/classfile, \ - BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols, \ + BIN := $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc, \ DISABLED_WARNINGS := options, \ JAVAC_FLAGS := \ $(INTERIM_LANGTOOLS_ARGS) \ @@ -71,7 +71,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \ $(ECHO) Creating javadoc element list $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ - -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \ + -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ build.tools.symbolgenerator.CreateSymbols \ build-javadoc-data \ $(CT_DATA_DESCRIPTION) \ @@ -79,7 +79,7 @@ $(SUPPORT_OUTPUTDIR)/javadoc-symbols/symbols: \ 11 $(JAVA_SMALL) $(INTERIM_LANGTOOLS_ARGS) \ $(COMPILECREATESYMBOLS_ADD_EXPORTS) \ - -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols \ + -classpath $(BUILDTOOLS_OUTPUTDIR)/create_symbols_javadoc \ build.tools.symbolgenerator.JavadocElementList \ $(JDK_OUTPUTDIR)/modules/jdk.javadoc/jdk/javadoc/internal/doclets/toolkit/resources/releases/element-list-$(JDK_SOURCE_TARGET_VERSION).txt \ $(JAVADOC_MODULESOURCEPATH) \ diff --git a/make/modules/jdk.incubator.jpackage/Gensrc.gmk b/make/modules/jdk.jpackage/Gensrc.gmk similarity index 93% rename from make/modules/jdk.incubator.jpackage/Gensrc.gmk rename to make/modules/jdk.jpackage/Gensrc.gmk index 5948a80f120..6f3e8b08119 100644 --- a/make/modules/jdk.incubator.jpackage/Gensrc.gmk +++ b/make/modules/jdk.jpackage/Gensrc.gmk @@ -31,7 +31,7 @@ include GensrcCommonJdk.gmk ifeq ($(call isTargetOs, macosx), true) ENTITLEMENTS_SRC_FILE := $(TOPDIR)/make/data/macosxsigning/java.plist ENTITLEMENTS_TARGET_FILE := \ - $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/incubator/jpackage/internal/resources/entitlements.plist + $(SUPPORT_OUTPUTDIR)/gensrc/$(MODULE)/jdk/jpackage/internal/resources/entitlements.plist $(ENTITLEMENTS_TARGET_FILE): $(ENTITLEMENTS_SRC_FILE) $(call install-file) diff --git a/make/modules/jdk.incubator.jpackage/Launcher.gmk b/make/modules/jdk.jpackage/Launcher.gmk similarity index 95% rename from make/modules/jdk.incubator.jpackage/Launcher.gmk rename to make/modules/jdk.jpackage/Launcher.gmk index 7a25dae733c..8d553d5c107 100644 --- a/make/modules/jdk.incubator.jpackage/Launcher.gmk +++ b/make/modules/jdk.jpackage/Launcher.gmk @@ -26,5 +26,5 @@ include LauncherCommon.gmk $(eval $(call SetupBuildLauncher, jpackage, \ - MAIN_CLASS := jdk.incubator.jpackage.main.Main, \ + MAIN_CLASS := jdk.jpackage.main.Main, \ )) diff --git a/make/modules/jdk.incubator.jpackage/Lib.gmk b/make/modules/jdk.jpackage/Lib.gmk similarity index 91% rename from make/modules/jdk.incubator.jpackage/Lib.gmk rename to make/modules/jdk.jpackage/Lib.gmk index 7ffef99afe4..7dfb70be5a6 100644 --- a/make/modules/jdk.incubator.jpackage/Lib.gmk +++ b/make/modules/jdk.jpackage/Lib.gmk @@ -29,8 +29,8 @@ include LibCommon.gmk JPACKAGE_APPLAUNCHER_SRC := \ - $(call FindSrcDirsForComponent, jdk.incubator.jpackage, applauncher) \ - $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common) + $(call FindSrcDirsForComponent, jdk.jpackage, applauncher) \ + $(call FindSrcDirsForComponent, jdk.jpackage, common) ifeq ($(call isTargetOs, windows), true) @@ -42,7 +42,7 @@ else endif -JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/incubator/jpackage/internal/resources +JPACKAGE_OUTPUT_DIR := $(JDK_OUTPUTDIR)/modules/$(MODULE)/jdk/jpackage/internal/resources JPACKAGE_CXXFLAGS_windows := -EHsc -DUNICODE -D_UNICODE # Output app launcher executable in resources dir, and symbols in the object dir @@ -73,7 +73,7 @@ ifeq ($(call isTargetOs, windows), true) $(eval $(call SetupJdkLibrary, BUILD_LIB_JPACKAGE, \ NAME := jpackage, \ OPTIMIZATION := LOW, \ - EXTRA_SRC := jdk.incubator.jpackage:common, \ + EXTRA_SRC := jdk.jpackage:common, \ CXXFLAGS := $(CXXFLAGS_JDKLIB) $(JPACKAGE_CXXFLAGS_windows), \ LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK) \ $(call SET_SHARED_LIBRARY_ORIGIN), \ @@ -99,8 +99,8 @@ ifeq ($(call isTargetOs, windows), true) TARGETS += $(BUILD_LIB_WIXHELPER) JPACKAGE_MSIWRAPPER_SRC := \ - $(call FindSrcDirsForComponent, jdk.incubator.jpackage, msiwrapper) \ - $(call FindSrcDirsForComponent, jdk.incubator.jpackage, common) + $(call FindSrcDirsForComponent, jdk.jpackage, msiwrapper) \ + $(call FindSrcDirsForComponent, jdk.jpackage, common) # Build exe installer wrapper for msi installer $(eval $(call SetupJdkExecutable, BUILD_JPACKAGE_MSIWRAPPER, \ diff --git a/make/scripts/compare.sh b/make/scripts/compare.sh index 25630199a21..5d0e846e755 100644 --- a/make/scripts/compare.sh +++ b/make/scripts/compare.sh @@ -696,7 +696,7 @@ compare_bin_file() { # pdb files. PDB_DIRS="$(ls -d \ {$OTHER,$THIS}/support/modules_{cmds,libs}/{*,*/*} \ - {$OTHER,$THIS}/support/native/jdk.incubator.jpackage/* \ + {$OTHER,$THIS}/support/native/jdk.jpackage/* \ )" export _NT_SYMBOL_PATH="$(echo $PDB_DIRS | tr ' ' ';')" fi diff --git a/make/test/BuildMicrobenchmark.gmk b/make/test/BuildMicrobenchmark.gmk index 3bbbea47b8e..55e5026eb38 100644 --- a/make/test/BuildMicrobenchmark.gmk +++ b/make/test/BuildMicrobenchmark.gmk @@ -90,11 +90,10 @@ $(eval $(call SetupJavaCompilation, BUILD_JDK_MICROBENCHMARK, \ TARGET_RELEASE := $(TARGET_RELEASE_NEWJDK_UPGRADED), \ SMALL_JAVA := false, \ CLASSPATH := $(MICROBENCHMARK_CLASSPATH), \ - DISABLED_WARNINGS := processing rawtypes cast serial preview, \ + DISABLED_WARNINGS := processing rawtypes cast serial, \ SRC := $(MICROBENCHMARK_SRC), \ BIN := $(MICROBENCHMARK_CLASSES), \ JAVA_FLAGS := --add-modules jdk.unsupported --limit-modules java.management, \ - JAVAC_FLAGS := --enable-preview, \ )) $(BUILD_JDK_MICROBENCHMARK): $(JMH_COMPILE_JARS) diff --git a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java index 210970f6469..3c0f936358c 100644 --- a/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java +++ b/src/demo/share/java2d/J2DBench/src/j2dbench/tests/cmm/CMMTests.java @@ -73,14 +73,16 @@ public static void init() { ColorSpace.CS_sRGB, ColorSpace.CS_GRAY, ColorSpace.CS_LINEAR_RGB, - ColorSpace.CS_CIEXYZ + ColorSpace.CS_CIEXYZ, + ColorSpace.CS_PYCC }; String[] csNames = new String[]{ "CS_sRGB", "CS_GRAY", "CS_LINEAR_RGB", - "CS_CIEXYZ" + "CS_CIEXYZ", + "CS_PYCC" }; csList = new Option.IntList(cmmOptRoot, diff --git a/src/demo/share/jfc/Notepad/Notepad.java b/src/demo/share/jfc/Notepad/Notepad.java index 3ebe3f07d22..cb4552f94cf 100644 --- a/src/demo/share/jfc/Notepad/Notepad.java +++ b/src/demo/share/jfc/Notepad/Notepad.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -60,7 +60,7 @@ * @author Timothy Prinzing */ @SuppressWarnings("serial") -class Notepad extends JPanel { +public class Notepad extends JPanel { protected static Properties properties; private static ResourceBundle resources; diff --git a/src/hotspot/cpu/aarch64/aarch64-asmtest.py b/src/hotspot/cpu/aarch64/aarch64-asmtest.py index 104104b09a9..615fe5e045f 100644 --- a/src/hotspot/cpu/aarch64/aarch64-asmtest.py +++ b/src/hotspot/cpu/aarch64/aarch64-asmtest.py @@ -1,4 +1,7 @@ +import os import random +import subprocess +import sys AARCH64_AS = "as" AARCH64_OBJDUMP = "objdump" @@ -129,6 +132,8 @@ class OperandFactory: _modes = {'x' : GeneralRegister, 'w' : GeneralRegister, + 'b' : FloatRegister, + 'h' : FloatRegister, 's' : FloatRegister, 'd' : FloatRegister, 'z' : FloatZero, @@ -198,16 +203,16 @@ def __init__(self, name, mode): self.isFloat = (mode == 'd') | (mode == 's') if self.isFloat: self.isWord = mode != 'd' - self.asmRegPrefix = ["d", "s"][self.isWord] + self.asmRegPrefix = ["d", "s"][self.isWord] else: self.isWord = mode != 'x' self.asmRegPrefix = ["x", "w"][self.isWord] - + def name(self): return self._name + (self.mode if self.mode != 'x' else '') - + def aname(self): - return (self._name+mode if (mode == 'b' or mode == 'h') + return (self._name+mode if (mode == 'b' or mode == 'h') else self._name) class ThreeRegInstruction(Instruction): @@ -220,17 +225,17 @@ def generate(self): def cstr(self): return (super(ThreeRegInstruction, self).cstr() - + ('%s, %s, %s' + + ('%s, %s, %s' % (self.reg[0], self.reg[1], self.reg[2]))) - + def astr(self): prefix = self.asmRegPrefix return (super(ThreeRegInstruction, self).astr() - + ('%s, %s, %s' + + ('%s, %s, %s' % (self.reg[0].astr(prefix), self.reg[1].astr(prefix), self.reg[2].astr(prefix)))) - + class FourRegInstruction(ThreeRegInstruction): def generate(self): @@ -241,12 +246,12 @@ def generate(self): def cstr(self): return (super(FourRegInstruction, self).cstr() + (', %s' % self.reg[3])) - + def astr(self): prefix = self.asmRegPrefix return (super(FourRegInstruction, self).astr() + (', %s' % self.reg[3].astr(prefix))) - + class TwoRegInstruction(Instruction): def generate(self): @@ -261,17 +266,17 @@ def cstr(self): def astr(self): prefix = self.asmRegPrefix return (super(TwoRegInstruction, self).astr() - + ('%s, %s' + + ('%s, %s' % (self.reg[0].astr(prefix), self.reg[1].astr(prefix)))) - + class TwoRegImmedInstruction(TwoRegInstruction): def generate(self): super(TwoRegImmedInstruction, self).generate() self.immed = random.randint(0, 1<<11 -1) return self - + def cstr(self): return (super(TwoRegImmedInstruction, self).cstr() + ', %su' % self.immed) @@ -301,9 +306,9 @@ def generate(self): self.kind = ShiftKind().generate() self.distance = random.randint(0, (1<<5)-1 if self.isWord else (1<<6)-1) return self - + def cstr(self): - return ('%s, Assembler::%s, %s);' + return ('%s, Assembler::%s, %s);' % (ThreeRegInstruction.cstr(self), self.kind.cstr(), self.distance)) @@ -314,9 +319,9 @@ def astr(self): self.distance)) class AddSubCarryOp(ThreeRegInstruction): - + def cstr(self): - return ('%s);' + return ('%s);' % (ThreeRegInstruction.cstr(self))) class AddSubExtendedOp(ThreeRegInstruction): @@ -332,76 +337,75 @@ def generate(self): def cstr(self): return (super(AddSubExtendedOp, self).cstr() - + (", ext::" + AddSubExtendedOp.optNames[self.option] + + (", ext::" + AddSubExtendedOp.optNames[self.option] + ", " + str(self.amount) + ");")) - + def astr(self): return (super(AddSubExtendedOp, self).astr() - + (", " + AddSubExtendedOp.optNames[self.option] + + (", " + AddSubExtendedOp.optNames[self.option] + " #" + str(self.amount))) class AddSubImmOp(TwoRegImmedInstruction): def cstr(self): return super(AddSubImmOp, self).cstr() + ");" - + class LogicalImmOp(AddSubImmOp): # These tables are legal immediate logical operands immediates32 \ - = [0x1, 0x3f, 0x1f0, 0x7e0, - 0x1c00, 0x3ff0, 0x8000, 0x1e000, - 0x3e000, 0x78000, 0xe0000, 0x100000, - 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8, - 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0, - 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00, - 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe, - 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80, - 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f, - 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003, - 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff, - 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff, - 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87, - 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1, - 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff, - 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07, + = [0x1, 0x3f, 0x1f0, 0x7e0, + 0x1c00, 0x3ff0, 0x8000, 0x1e000, + 0x3e000, 0x78000, 0xe0000, 0x100000, + 0x1fffe0, 0x3fe000, 0x780000, 0x7ffff8, + 0xff8000, 0x1800180, 0x1fffc00, 0x3c003c0, + 0x3ffff00, 0x7c00000, 0x7fffe00, 0xf000f00, + 0xfffe000, 0x18181818, 0x1ffc0000, 0x1ffffffe, + 0x3f003f00, 0x3fffe000, 0x60006000, 0x7f807f80, + 0x7ffffc00, 0x800001ff, 0x803fffff, 0x9f9f9f9f, + 0xc0000fff, 0xc0c0c0c0, 0xe0000000, 0xe003e003, + 0xe3ffffff, 0xf0000fff, 0xf0f0f0f0, 0xf80000ff, + 0xf83ff83f, 0xfc00007f, 0xfc1fffff, 0xfe0001ff, + 0xfe3fffff, 0xff003fff, 0xff800003, 0xff87ff87, + 0xffc00fff, 0xffe0000f, 0xffefffef, 0xfff1fff1, + 0xfff83fff, 0xfffc0fff, 0xfffe0fff, 0xffff3fff, + 0xffffc007, 0xffffe1ff, 0xfffff80f, 0xfffffe07, 0xffffffbf, 0xfffffffd] immediates \ - = [0x1, 0x1f80, 0x3fff0, 0x3ffffc, - 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000, - 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000, - 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000, - 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000, - 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000, - 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8, - 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000, - 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff, - 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003, - 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff, - 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000, - 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f, - 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff, - 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff, + = [0x1, 0x1f80, 0x3fff0, 0x3ffffc, + 0x3fe0000, 0x1ffc0000, 0xf8000000, 0x3ffffc000, + 0xffffffe00, 0x3ffffff800, 0xffffc00000, 0x3f000000000, + 0x7fffffff800, 0x1fe000001fe0, 0x3ffffff80000, 0xc00000000000, + 0x1ffc000000000, 0x3ffff0003ffff, 0x7ffffffe00000, 0xfffffffffc000, + 0x1ffffffffffc00, 0x3fffffffffff00, 0x7ffffffffffc00, 0xffffffffff8000, + 0x1ffffffff800000, 0x3fffffc03fffffc, 0x7fffc0000000000, 0xff80ff80ff80ff8, + 0x1c00000000000000, 0x1fffffffffff0000, 0x3fffff803fffff80, 0x7fc000007fc00000, + 0x8000000000000000, 0x803fffff803fffff, 0xc000007fc000007f, 0xe00000000000ffff, + 0xe3ffffffffffffff, 0xf007f007f007f007, 0xf80003ffffffffff, 0xfc000003fc000003, + 0xfe000000007fffff, 0xff00000000007fff, 0xff800000000003ff, 0xffc00000000000ff, + 0xffe00000000003ff, 0xfff0000000003fff, 0xfff80000001fffff, 0xfffc0000fffc0000, + 0xfffe003fffffffff, 0xffff3fffffffffff, 0xffffc0000007ffff, 0xffffe01fffffe01f, + 0xfffff800000007ff, 0xfffffc0fffffffff, 0xffffff00003fffff, 0xffffffc0000007ff, + 0xfffffff0000001ff, 0xfffffffc00003fff, 0xffffffff07ffffff, 0xffffffffe003ffff, 0xfffffffffc01ffff, 0xffffffffffc00003, 0xfffffffffffc000f, 0xffffffffffffe07f] def generate(self): AddSubImmOp.generate(self) self.immed = \ self.immediates32[random.randint(0, len(self.immediates32)-1)] \ - if self.isWord \ - else \ - self.immediates[random.randint(0, len(self.immediates)-1)] - + if self.isWord else \ + self.immediates[random.randint(0, len(self.immediates)-1)] + return self - + def astr(self): return (super(TwoRegImmedInstruction, self).astr() + ', #0x%x' % self.immed) def cstr(self): return super(AddSubImmOp, self).cstr() + "ll);" - + class MultiOp(): def multipleForms(self): @@ -422,9 +426,9 @@ def astr(self): return Instruction.astr(self) + "%s" class RegAndAbsOp(MultiOp, Instruction): - + def multipleForms(self): - if self.name() == "adrp": + if self.name() == "adrp": # We can only test one form of adrp because anything other # than "adrp ." requires relocs in the assembler output return 1 @@ -434,11 +438,11 @@ def generate(self): Instruction.generate(self) self.reg = GeneralRegister().generate() return self - + def cstr(self): if self.name() == "adrp": return "__ _adrp(" + "%s, %s);" % (self.reg, "%s") - return (super(RegAndAbsOp, self).cstr() + return (super(RegAndAbsOp, self).cstr() + "%s, %s);" % (self.reg, "%s")) def astr(self): @@ -446,14 +450,14 @@ def astr(self): + self.reg.astr(self.asmRegPrefix) + ", %s") class RegImmAbsOp(RegAndAbsOp): - + def cstr(self): return (Instruction.cstr(self) + "%s, %s, %s);" % (self.reg, self.immed, "%s")) def astr(self): return (Instruction.astr(self) - + ("%s, #%s, %s" + + ("%s, #%s, %s" % (self.reg.astr(self.asmRegPrefix), self.immed, "%s"))) def generate(self): @@ -462,7 +466,7 @@ def generate(self): return self class MoveWideImmOp(RegImmAbsOp): - + def multipleForms(self): return 0 @@ -472,8 +476,8 @@ def cstr(self): def astr(self): return (Instruction.astr(self) - + ("%s, #%s, lsl %s" - % (self.reg.astr(self.asmRegPrefix), + + ("%s, #%s, lsl %s" + % (self.reg.astr(self.asmRegPrefix), self.immed, self.shift))) def generate(self): @@ -486,7 +490,7 @@ def generate(self): return self class BitfieldOp(TwoRegInstruction): - + def cstr(self): return (Instruction.cstr(self) + ("%s, %s, %s, %s);" @@ -513,16 +517,16 @@ def generate(self): def cstr(self): return (ThreeRegInstruction.cstr(self) + (", %s);" % self.lsb)) - + def astr(self): return (ThreeRegInstruction.astr(self) + (", #%s" % self.lsb)) - + class CondBranchOp(MultiOp, Instruction): def cstr(self): return "__ br(Assembler::" + self.name() + ", %s);" - + def astr(self): return "b." + self.name() + "\t%s" @@ -530,10 +534,10 @@ class ImmOp(Instruction): def cstr(self): return "%s%s);" % (Instruction.cstr(self), self.immed) - + def astr(self): return Instruction.astr(self) + "#" + str(self.immed) - + def generate(self): self.immed = random.randint(0, 1<<16 -1) return self @@ -542,6 +546,8 @@ class Op(Instruction): def cstr(self): return Instruction.cstr(self) + ");" + def astr(self): + return self.aname(); class SystemOp(Instruction): @@ -573,11 +579,11 @@ def generate(self): return self def cstr(self): - return (super(ConditionalCompareOp, self).cstr() + ", " + return (super(ConditionalCompareOp, self).cstr() + ", " + "Assembler::" + conditionCodes[self.cond] + ");") def astr(self): - return (super(ConditionalCompareOp, self).astr() + + return (super(ConditionalCompareOp, self).astr() + ", " + conditionCodes[self.cond]) class ConditionalCompareImmedOp(Instruction): @@ -596,33 +602,33 @@ def cstr(self): + "Assembler::" + conditionCodes[self.cond] + ");") def astr(self): - return (Instruction.astr(self) - + self.reg.astr(self.asmRegPrefix) + return (Instruction.astr(self) + + self.reg.astr(self.asmRegPrefix) + ", #" + str(self.immed) + ", #" + str(self.immed2) + ", " + conditionCodes[self.cond]) class TwoRegOp(TwoRegInstruction): - + def cstr(self): return TwoRegInstruction.cstr(self) + ");" class ThreeRegOp(ThreeRegInstruction): - + def cstr(self): return ThreeRegInstruction.cstr(self) + ");" class FourRegMulOp(FourRegInstruction): - + def cstr(self): return FourRegInstruction.cstr(self) + ");" def astr(self): isMaddsub = self.name().startswith("madd") | self.name().startswith("msub") midPrefix = self.asmRegPrefix if isMaddsub else "w" - return (Instruction.astr(self) - + self.reg[0].astr(self.asmRegPrefix) - + ", " + self.reg[1].astr(midPrefix) + return (Instruction.astr(self) + + self.reg[0].astr(self.asmRegPrefix) + + ", " + self.reg[1].astr(midPrefix) + ", " + self.reg[2].astr(midPrefix) + ", " + self.reg[3].astr(self.asmRegPrefix)) @@ -638,8 +644,8 @@ def cstr(self): + "Assembler::" + conditionCodes[self.cond] + ");") def astr(self): - return (ThreeRegInstruction.astr(self) - + ", " + conditionCodes[self.cond]) + return (ThreeRegInstruction.astr(self) + + ", " + conditionCodes[self.cond]) class LoadStoreExclusiveOp(InstructionWithModes): @@ -651,7 +657,7 @@ def astr(self): result = self.aname() + '\t' regs = list(self.regs) index = regs.pop() # The last reg is the index register - prefix = ('x' if (self.mode == 'x') + prefix = ('x' if (self.mode == 'x') & ((self.name().startswith("ld")) | (self.name().startswith("stlr"))) # Ewww :-( else 'w') @@ -698,17 +704,17 @@ def aname(self): return self._name class Address(object): - + base_plus_unscaled_offset, pre, post, base_plus_reg, \ base_plus_scaled_offset, pcrel, post_reg, base_only = range(8) - kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg", + kinds = ["base_plus_unscaled_offset", "pre", "post", "base_plus_reg", "base_plus_scaled_offset", "pcrel", "post_reg", "base_only"] extend_kinds = ["uxtw", "lsl", "sxtw", "sxtx"] @classmethod def kindToStr(cls, i): return cls.kinds[i] - + def generate(self, kind, shift_distance): self.kind = kind self.base = GeneralRegister().generate() @@ -738,7 +744,7 @@ def __str__(self): Address.pcrel: "", Address.base_plus_reg: "Address(%s, %s, Address::%s(%s))" \ % (self.base, self.index, self.extend_kind, self.shift_distance), - Address.base_plus_scaled_offset: + Address.base_plus_scaled_offset: "Address(%s, %s)" % (self.base, self.offset) } [self.kind] if (self.kind == Address.pcrel): result = ["__ pc()", "back", "forth"][self.offset] @@ -758,7 +764,7 @@ def astr(self, prefix): Address.base_only: "[%s]" % (self.base.astr(prefix)), Address.pcrel: "", Address.base_plus_reg: "[%s, %s, %s #%s]" \ - % (self.base.astr(prefix), self.index.astr(extend_prefix), + % (self.base.astr(prefix), self.index.astr(extend_prefix), self.extend_kind, self.shift_distance), Address.base_plus_scaled_offset: \ "[%s, %s]" \ @@ -767,7 +773,7 @@ def astr(self, prefix): if (self.kind == Address.pcrel): result = [".", "back", "forth"][self.offset] return result - + class LoadStoreOp(InstructionWithModes): def __init__(self, args): @@ -822,14 +828,14 @@ def aname(self): class LoadStorePairOp(InstructionWithModes): numRegs = 2 - + def __init__(self, args): name, self.asmname, self.kind, mode = args InstructionWithModes.__init__(self, name, mode) self.offset = random.randint(-1<<4, 1<<4-1) << 4 - + def generate(self): - self.reg = [OperandFactory.create(self.mode).generate() + self.reg = [OperandFactory.create(self.mode).generate() for i in range(self.numRegs)] self.base = OperandFactory.create('x').generate() kindStr = Address.kindToStr(self.kind); @@ -846,8 +852,8 @@ def astr(self): address = ["[%s, #%s]", "[%s, #%s]!", "[%s], #%s"][self.kind] address = address % (self.base.astr('x'), self.offset) result = "%s\t%s, %s, %s" \ - % (self.asmname, - self.reg[0].astr(self.asmRegPrefix), + % (self.asmname, + self.reg[0].astr(self.asmRegPrefix), self.reg[1].astr(self.asmRegPrefix), address) return result @@ -875,7 +881,7 @@ def __init__(self, args): Instruction.__init__(self, name) def generate(self): - self.reg = [OperandFactory.create(self.modes[i]).generate() + self.reg = [OperandFactory.create(self.modes[i]).generate() for i in range(self.numRegs)] return self @@ -884,7 +890,7 @@ def cstr(self): return (formatStr % tuple([Instruction.cstr(self)] + [str(self.reg[i]) for i in range(self.numRegs)])) # Yowza - + def astr(self): formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)]) return (formatStr @@ -985,7 +991,7 @@ def astr(self): moreReg + [str(self.reg[2]) + self._width.astr()]) -class LdStSIMDOp(Instruction): +class LdStNEONOp(Instruction): def __init__(self, args): self._name, self.regnum, self.arrangement, self.addresskind = args @@ -1004,7 +1010,7 @@ def generate(self): return self def cstr(self): - buf = super(LdStSIMDOp, self).cstr() + str(self._firstSIMDreg) + buf = super(LdStNEONOp, self).cstr() + str(self._firstSIMDreg) current = self._firstSIMDreg for cnt in range(1, self.regnum): buf = '%s, %s' % (buf, current.nextReg()) @@ -1022,6 +1028,57 @@ def astr(self): def aname(self): return self._name +class NEONReduceInstruction(Instruction): + def __init__(self, args): + self._name, self.insname, self.arrangement = args + + def generate(self): + current = FloatRegister().generate() + self.dstSIMDreg = current + self.srcSIMDreg = current.nextReg() + return self + + def cstr(self): + buf = Instruction.cstr(self) + str(self.dstSIMDreg) + buf = '%s, __ T%s, %s);' % (buf, self.arrangement, self.srcSIMDreg) + return buf + + def astr(self): + buf = '%s\t%s' % (self.insname, self.dstSIMDreg.astr(self.arrangement[-1].lower())) + buf = '%s, %s.%s' % (buf, self.srcSIMDreg, self.arrangement) + return buf + + def aname(self): + return self._name + +class CommonNEONInstruction(Instruction): + def __init__(self, args): + self._name, self.insname, self.arrangement = args + + def generate(self): + self._firstSIMDreg = FloatRegister().generate() + return self + + def cstr(self): + buf = Instruction.cstr(self) + str(self._firstSIMDreg) + buf = '%s, __ T%s' % (buf, self.arrangement) + current = self._firstSIMDreg + for cnt in range(1, self.numRegs): + buf = '%s, %s' % (buf, current.nextReg()) + current = current.nextReg() + return '%s);' % (buf) + + def astr(self): + buf = '%s\t%s.%s' % (self.insname, self._firstSIMDreg, self.arrangement) + current = self._firstSIMDreg + for cnt in range(1, self.numRegs): + buf = '%s, %s.%s' % (buf, current.nextReg(), self.arrangement) + current = current.nextReg() + return buf + + def aname(self): + return self._name + class SHA512SIMDOp(Instruction): def generate(self): @@ -1053,6 +1110,44 @@ def astr(self): + ('\t%s, %s, %s.2D' % (self.reg[0].astr("q"), self.reg[1].astr("q"), self.reg[2].astr("v")))) +class SHA3SIMDOp(Instruction): + + def generate(self): + if ((self._name == 'eor3') or (self._name == 'bcax')): + self.reg = [FloatRegister().generate(), FloatRegister().generate(), + FloatRegister().generate(), FloatRegister().generate()] + else: + self.reg = [FloatRegister().generate(), FloatRegister().generate(), + FloatRegister().generate()] + if (self._name == 'xar'): + self.imm6 = random.randint(0, 63) + return self + + def cstr(self): + if ((self._name == 'eor3') or (self._name == 'bcax')): + return (super(SHA3SIMDOp, self).cstr() + + ('%s, __ T16B, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.reg[3]))) + elif (self._name == 'rax1'): + return (super(SHA3SIMDOp, self).cstr() + + ('%s, __ T2D, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2]))) + else: + return (super(SHA3SIMDOp, self).cstr() + + ('%s, __ T2D, %s, %s, %s);' % (self.reg[0], self.reg[1], self.reg[2], self.imm6))) + + def astr(self): + if ((self._name == 'eor3') or (self._name == 'bcax')): + return (super(SHA3SIMDOp, self).astr() + + ('\t%s.16B, %s.16B, %s.16B, %s.16B' % (self.reg[0].astr("v"), self.reg[1].astr("v"), + self.reg[2].astr("v"), self.reg[3].astr("v")))) + elif (self._name == 'rax1'): + return (super(SHA3SIMDOp, self).astr() + + ('\t%s.2D, %s.2D, %s.2D') % (self.reg[0].astr("v"), self.reg[1].astr("v"), + self.reg[2].astr("v"))) + else: + return (super(SHA3SIMDOp, self).astr() + + ('\t%s.2D, %s.2D, %s.2D, #%s') % (self.reg[0].astr("v"), self.reg[1].astr("v"), + self.reg[2].astr("v"), self.imm6)) + class LSEOp(Instruction): def __init__(self, args): self._name, self.asmname, self.size, self.suffix = args @@ -1097,6 +1192,12 @@ def aname(self): def cname(self): return self._cname +class TwoRegNEONOp(CommonNEONInstruction): + numRegs = 2 + +class ThreeRegNEONOp(TwoRegNEONOp): + numRegs = 3 + class SpecialCases(Instruction): def __init__(self, data): self._name = data[0] @@ -1129,6 +1230,7 @@ def generate(kind, names): outfile = open("aarch64ops.s", "w") +# To minimize the changes of assembler test code random.seed(0) print "// BEGIN Generated code -- do not edit" @@ -1139,18 +1241,18 @@ def generate(kind, names): outfile.write("back:\n") -generate (ArithOp, +generate (ArithOp, [ "add", "sub", "adds", "subs", "addw", "subw", "addsw", "subsw", "and", "orr", "eor", "ands", - "andw", "orrw", "eorw", "andsw", - "bic", "orn", "eon", "bics", + "andw", "orrw", "eorw", "andsw", + "bic", "orn", "eon", "bics", "bicw", "ornw", "eonw", "bicsw" ]) -generate (AddSubImmOp, +generate (AddSubImmOp, [ "addw", "addsw", "subw", "subsw", "add", "adds", "sub", "subs"]) -generate (LogicalImmOp, +generate (LogicalImmOp, [ "andw", "orrw", "eorw", "andsw", "and", "orr", "eor", "ands"]) @@ -1191,26 +1293,26 @@ def generate(kind, names): ["stxp", mode, 4], ["stlxp", mode, 4]]) for kind in range(6): - print "\n// " + Address.kindToStr(kind), + sys.stdout.write("\n// " + Address.kindToStr(kind)) if kind != Address.pcrel: - generate (LoadStoreOp, - [["str", "str", kind, "x"], ["str", "str", kind, "w"], + generate (LoadStoreOp, + [["str", "str", kind, "x"], ["str", "str", kind, "w"], ["str", "strb", kind, "b"], ["str", "strh", kind, "h"], - ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"], + ["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"], ["ldr", "ldrb", kind, "b"], ["ldr", "ldrh", kind, "h"], - ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"], + ["ldrsb", "ldrsb", kind, "x"], ["ldrsh", "ldrsh", kind, "x"], ["ldrsh", "ldrsh", kind, "w"], ["ldrsw", "ldrsw", kind, "x"], - ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"], - ["str", "str", kind, "d"], ["str", "str", kind, "s"], + ["ldr", "ldr", kind, "d"], ["ldr", "ldr", kind, "s"], + ["str", "str", kind, "d"], ["str", "str", kind, "s"], ]) else: - generate (LoadStoreOp, + generate (LoadStoreOp, [["ldr", "ldr", kind, "x"], ["ldr", "ldr", kind, "w"]]) - + for kind in (Address.base_plus_unscaled_offset, Address.pcrel, Address.base_plus_reg, \ Address.base_plus_scaled_offset): - generate (LoadStoreOp, + generate (LoadStoreOp, [["prfm", "prfm\tPLDL1KEEP,", kind, "x"]]) generate(AddSubCarryOp, ["adcw", "adcsw", "sbcw", "sbcsw", "adc", "adcs", "sbc", "sbcs"]) @@ -1219,32 +1321,32 @@ def generate(kind, names): generate(ConditionalCompareOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"]) generate(ConditionalCompareImmedOp, ["ccmnw", "ccmpw", "ccmn", "ccmp"]) -generate(ConditionalSelectOp, +generate(ConditionalSelectOp, ["cselw", "csincw", "csinvw", "csnegw", "csel", "csinc", "csinv", "csneg"]) -generate(TwoRegOp, - ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit", +generate(TwoRegOp, + ["rbitw", "rev16w", "revw", "clzw", "clsw", "rbit", "rev16", "rev32", "rev", "clz", "cls"]) -generate(ThreeRegOp, - ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv", +generate(ThreeRegOp, + ["udivw", "sdivw", "lslvw", "lsrvw", "asrvw", "rorvw", "udiv", "sdiv", "lslv", "lsrv", "asrv", "rorv", "umulh", "smulh"]) -generate(FourRegMulOp, +generate(FourRegMulOp, ["maddw", "msubw", "madd", "msub", "smaddl", "smsubl", "umaddl", "umsubl"]) -generate(ThreeRegFloatOp, - [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"], +generate(ThreeRegFloatOp, + [["fmuls", "sss"], ["fdivs", "sss"], ["fadds", "sss"], ["fsubs", "sss"], ["fmuls", "sss"], - ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"], + ["fmuld", "ddd"], ["fdivd", "ddd"], ["faddd", "ddd"], ["fsubd", "ddd"], ["fmuld", "ddd"]]) -generate(FourRegFloatOp, - [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"], +generate(FourRegFloatOp, + [["fmadds", "ssss"], ["fmsubs", "ssss"], ["fnmadds", "ssss"], ["fnmadds", "ssss"], ["fmaddd", "dddd"], ["fmsubd", "dddd"], ["fnmaddd", "dddd"], ["fnmaddd", "dddd"],]) -generate(TwoRegFloatOp, - [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"], +generate(TwoRegFloatOp, + [["fmovs", "ss"], ["fabss", "ss"], ["fnegs", "ss"], ["fsqrts", "ss"], ["fcvts", "ds"], - ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"], + ["fmovd", "dd"], ["fabsd", "dd"], ["fnegd", "dd"], ["fsqrtd", "dd"], ["fcvtd", "sd"], ]) @@ -1255,18 +1357,18 @@ def generate(kind, names): ["fmovs", "fmov", "ws"], ["fmovd", "fmov", "xd"], ["fmovs", "fmov", "sw"], ["fmovd", "fmov", "dx"]]) -generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"], +generate(TwoRegFloatOp, [["fcmps", "ss"], ["fcmpd", "dd"], ["fcmps", "sz"], ["fcmpd", "dz"]]) for kind in range(3): generate(LoadStorePairOp, [["stp", "stp", kind, "w"], ["ldp", "ldp", kind, "w"], - ["ldpsw", "ldpsw", kind, "x"], + ["ldpsw", "ldpsw", kind, "x"], ["stp", "stp", kind, "x"], ["ldp", "ldp", kind, "x"] ]) generate(LoadStorePairOp, [["stnp", "stnp", 0, "w"], ["ldnp", "ldnp", 0, "w"], ["stnp", "stnp", 0, "x"], ["ldnp", "ldnp", 0, "x"]]) -generate(LdStSIMDOp, [["ld1", 1, "8B", Address.base_only], +generate(LdStNEONOp, [["ld1", 1, "8B", Address.base_only], ["ld1", 2, "16B", Address.post], ["ld1", 3, "1D", Address.post_reg], ["ld1", 4, "8H", Address.post], @@ -1290,7 +1392,92 @@ def generate(kind, names): ["ld4r", 4, "2S", Address.post_reg], ]) -generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"]) +generate(NEONReduceInstruction, + [["addv", "addv", "8B"], ["addv", "addv", "16B"], + ["addv", "addv", "4H"], ["addv", "addv", "8H"], + ["addv", "addv", "4S"], + ["smaxv", "smaxv", "8B"], ["smaxv", "smaxv", "16B"], + ["smaxv", "smaxv", "4H"], ["smaxv", "smaxv", "8H"], + ["smaxv", "smaxv", "4S"], ["fmaxv", "fmaxv", "4S"], + ["sminv", "sminv", "8B"], ["sminv", "sminv", "16B"], + ["sminv", "sminv", "4H"], ["sminv", "sminv", "8H"], + ["sminv", "sminv", "4S"], ["fminv", "fminv", "4S"], + ]) + +generate(TwoRegNEONOp, + [["absr", "abs", "8B"], ["absr", "abs", "16B"], + ["absr", "abs", "4H"], ["absr", "abs", "8H"], + ["absr", "abs", "2S"], ["absr", "abs", "4S"], + ["absr", "abs", "2D"], + ["fabs", "fabs", "2S"], ["fabs", "fabs", "4S"], + ["fabs", "fabs", "2D"], + ["fneg", "fneg", "2S"], ["fneg", "fneg", "4S"], + ["fneg", "fneg", "2D"], + ["fsqrt", "fsqrt", "2S"], ["fsqrt", "fsqrt", "4S"], + ["fsqrt", "fsqrt", "2D"], + ["notr", "not", "8B"], ["notr", "not", "16B"], + ]) + +generate(ThreeRegNEONOp, + [["andr", "and", "8B"], ["andr", "and", "16B"], + ["orr", "orr", "8B"], ["orr", "orr", "16B"], + ["eor", "eor", "8B"], ["eor", "eor", "16B"], + ["addv", "add", "8B"], ["addv", "add", "16B"], + ["addv", "add", "4H"], ["addv", "add", "8H"], + ["addv", "add", "2S"], ["addv", "add", "4S"], + ["addv", "add", "2D"], + ["fadd", "fadd", "2S"], ["fadd", "fadd", "4S"], + ["fadd", "fadd", "2D"], + ["subv", "sub", "8B"], ["subv", "sub", "16B"], + ["subv", "sub", "4H"], ["subv", "sub", "8H"], + ["subv", "sub", "2S"], ["subv", "sub", "4S"], + ["subv", "sub", "2D"], + ["fsub", "fsub", "2S"], ["fsub", "fsub", "4S"], + ["fsub", "fsub", "2D"], + ["mulv", "mul", "8B"], ["mulv", "mul", "16B"], + ["mulv", "mul", "4H"], ["mulv", "mul", "8H"], + ["mulv", "mul", "2S"], ["mulv", "mul", "4S"], + ["fmul", "fmul", "2S"], ["fmul", "fmul", "4S"], + ["fmul", "fmul", "2D"], + ["mlav", "mla", "4H"], ["mlav", "mla", "8H"], + ["mlav", "mla", "2S"], ["mlav", "mla", "4S"], + ["fmla", "fmla", "2S"], ["fmla", "fmla", "4S"], + ["fmla", "fmla", "2D"], + ["mlsv", "mls", "4H"], ["mlsv", "mls", "8H"], + ["mlsv", "mls", "2S"], ["mlsv", "mls", "4S"], + ["fmls", "fmls", "2S"], ["fmls", "fmls", "4S"], + ["fmls", "fmls", "2D"], + ["fdiv", "fdiv", "2S"], ["fdiv", "fdiv", "4S"], + ["fdiv", "fdiv", "2D"], + ["maxv", "smax", "8B"], ["maxv", "smax", "16B"], + ["maxv", "smax", "4H"], ["maxv", "smax", "8H"], + ["maxv", "smax", "2S"], ["maxv", "smax", "4S"], + ["fmax", "fmax", "2S"], ["fmax", "fmax", "4S"], + ["fmax", "fmax", "2D"], + ["minv", "smin", "8B"], ["minv", "smin", "16B"], + ["minv", "smin", "4H"], ["minv", "smin", "8H"], + ["minv", "smin", "2S"], ["minv", "smin", "4S"], + ["fmin", "fmin", "2S"], ["fmin", "fmin", "4S"], + ["fmin", "fmin", "2D"], + ["cmeq", "cmeq", "8B"], ["cmeq", "cmeq", "16B"], + ["cmeq", "cmeq", "4H"], ["cmeq", "cmeq", "8H"], + ["cmeq", "cmeq", "2S"], ["cmeq", "cmeq", "4S"], + ["cmeq", "cmeq", "2D"], + ["fcmeq", "fcmeq", "2S"], ["fcmeq", "fcmeq", "4S"], + ["fcmeq", "fcmeq", "2D"], + ["cmgt", "cmgt", "8B"], ["cmgt", "cmgt", "16B"], + ["cmgt", "cmgt", "4H"], ["cmgt", "cmgt", "8H"], + ["cmgt", "cmgt", "2S"], ["cmgt", "cmgt", "4S"], + ["cmgt", "cmgt", "2D"], + ["fcmgt", "fcmgt", "2S"], ["fcmgt", "fcmgt", "4S"], + ["fcmgt", "fcmgt", "2D"], + ["cmge", "cmge", "8B"], ["cmge", "cmge", "16B"], + ["cmge", "cmge", "4H"], ["cmge", "cmge", "8H"], + ["cmge", "cmge", "2S"], ["cmge", "cmge", "4S"], + ["cmge", "cmge", "2D"], + ["fcmge", "fcmge", "2S"], ["fcmge", "fcmge", "4S"], + ["fcmge", "fcmge", "2D"], + ]) generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);", "ccmn\txzr, xzr, #3, LE"], ["ccmnw", "__ ccmnw(zr, zr, 5u, Assembler::EQ);", "ccmn\twzr, wzr, #5, EQ"], @@ -1344,9 +1531,9 @@ def generate(kind, names): ]) print "\n// FloatImmediateOp" -for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", - "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625", - "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0", +for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125", + "0.1328125", "0.25", "0.265625", "0.5", "0.53125", "1.0", "1.0625", + "-2.0", "-2.125", "-4.0", "-4.25", "-8.0", "-8.5", "-16.0", "-17.0", "-0.125", "-0.1328125", "-0.25", "-0.265625", "-0.5", "-0.53125", "-1.0", "-1.0625"): astr = "fmov d0, #" + float cstr = "__ fmovd(v0, " + float + ");" @@ -1366,6 +1553,11 @@ def generate(kind, names): ["ldumin", "ldumin", size, suffix], ["ldumax", "ldumax", size, suffix]]); +# ARMv8.2A +generate(SHA3SIMDOp, ["bcax", "eor3", "rax1", "xar"]) + +generate(SHA512SIMDOp, ["sha512h", "sha512h2", "sha512su0", "sha512su1"]) + generate(SVEVectorOp, [["add", "ZZZ"], ["sub", "ZZZ"], ["fadd", "ZZZ"], @@ -1414,16 +1606,11 @@ def generate(kind, names): outfile.close() -import subprocess -import sys - -# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension. -subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) +# compile for sve with 8.2 and sha3 because of SHA3 crypto extension. +subprocess.check_call([AARCH64_AS, "-march=armv8.2-a+sha3+sve", "aarch64ops.s", "-o", "aarch64ops.o"]) print -print "/*", -sys.stdout.flush() -subprocess.check_call([AARCH64_OBJDUMP, "-d", "aarch64ops.o"]) +print "/*" print "*/" subprocess.check_call([AARCH64_OBJCOPY, "-O", "binary", "-j", ".text", "aarch64ops.o", "aarch64ops.bin"]) @@ -1444,4 +1631,7 @@ def generate(kind, names): print "\n };" print "// END Generated code -- do not edit" +infile.close() +for f in ["aarch64ops.s", "aarch64ops.o", "aarch64ops.bin"]: + os.remove(f) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index ede4040491e..ff82cd08cc1 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -614,9 +614,7 @@ alloc_class chunk3(RFLAGS); // Several register classes are automatically defined based upon information in // this architecture description. // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ ) -// 2) reg_class compiler_method_reg ( /* as def'd in frame section */ ) -// 2) reg_class interpreter_method_reg ( /* as def'd in frame section */ ) -// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // // Class for all 32 bit general purpose registers @@ -1755,7 +1753,9 @@ int MachCallDynamicJavaNode::ret_addr_offset() int MachCallRuntimeNode::ret_addr_offset() { // for generated stubs the call will be - // far_call(addr) + // bl(addr) + // or with far branches + // bl(trampoline_stub) // for real runtime callouts it will be six instructions // see aarch64_enc_java_to_runtime // adr(rscratch2, retaddr) @@ -1764,7 +1764,7 @@ int MachCallRuntimeNode::ret_addr_offset() { // blr(rscratch1) CodeBlob *cb = CodeCache::find_blob(_entry_point); if (cb) { - return MacroAssembler::far_branch_size(); + return 1 * NativeInstruction::instruction_size; } else { return 6 * NativeInstruction::instruction_size; } @@ -1966,9 +1966,10 @@ void MachEpilogNode::format(PhaseRegAlloc *ra_, outputStream *st) const { } if (do_polling() && C->is_method_compilation()) { - st->print("# touch polling page\n\t"); - st->print("ldr rscratch1, [rthread],#polling_page_offset\n\t"); - st->print("ldr zr, [rscratch1]"); + st->print("# test polling word\n\t"); + st->print("ldr rscratch1, [rthread],#%d\n\t", in_bytes(JavaThread::polling_word_offset())); + st->print("cmp sp, rscratch1\n\t"); + st->print("bhi #slow_path"); } } #endif @@ -1985,7 +1986,13 @@ void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { } if (do_polling() && C->is_method_compilation()) { - __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type); + Label dummy_label; + Label* code_stub = &dummy_label; + if (!C->output()->in_scratch_emit_size()) { + code_stub = &C->output()->safepoint_poll_table()->add_safepoint(__ offset()); + } + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub, true /* at_return */, false /* acquire */, true /* in_nmethod */); } } @@ -2403,6 +2410,12 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType break; case Op_MulVL: return false; + case Op_VectorLoadShuffle: + case Op_VectorRearrange: + if (vlen < 4) { + return false; + } + break; default: break; } @@ -2414,6 +2427,10 @@ const bool Matcher::has_predicated_vectors(void) { return UseSVE > 0; } +bool Matcher::supports_vector_variable_shifts(void) { + return true; +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -2459,11 +2476,18 @@ const int Matcher::min_vector_size(const BasicType bt) { if ((UseSVE > 0) && (MaxVectorSize >= 16)) { // Currently vector length less than SVE vector register size is not supported. return max_size; - } else { - // For the moment limit the vector size to 8 bytes with NEON. + } else { // NEON + // Limit the vector size to 8 bytes int size = 8 / type2aelembytes(bt); + if (bt == T_BYTE) { + // To support vector api shuffle/rearrange. + size = 4; + } else if (bt == T_BOOLEAN) { + // To support vector api load/store mask. + size = 2; + } if (size < 2) size = 2; - return size; + return MIN2(size,max_size); } } @@ -2482,6 +2506,9 @@ const uint Matcher::vector_ideal_reg(int len) { return Op_VecA; } switch(len) { + // For 16-bit/32-bit mask vector, reuse VecD. + case 2: + case 4: case 8: return Op_VecD; case 16: return Op_VecX; } @@ -2581,11 +2608,6 @@ const bool Matcher::rematerialize_float_constants = false; // C code as the Java calling convention forces doubles to be aligned. const bool Matcher::misaligned_doubles_ok = true; -// No-op on amd64 -void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { - Unimplemented(); -} - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. const bool Matcher::strict_fp_requires_explicit_rounding = false; @@ -3124,6 +3146,12 @@ encode %{ // END Non-volatile memory access // Vector loads and stores + enc_class aarch64_enc_ldrvH(vecD dst, memory mem) %{ + FloatRegister dst_reg = as_FloatRegister($dst$$reg); + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::H, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, @@ -3142,6 +3170,12 @@ encode %{ $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} + enc_class aarch64_enc_strvH(vecD src, memory mem) %{ + FloatRegister src_reg = as_FloatRegister($src$$reg); + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::H, + $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + %} + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, @@ -3733,12 +3767,19 @@ encode %{ if (!_method) { // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf); + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } } else { int method_index = resolved_method_index(cbuf); RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) : static_call_Relocation::spec(method_index); call = __ trampoline_call(Address(addr, rspec), &cbuf); - + if (call == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } // Emit stub for static call address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); if (stub == NULL) { @@ -3746,10 +3787,8 @@ encode %{ return; } } - if (call == NULL) { - ciEnv::current()->record_failure("CodeCache is full"); - return; - } else if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { + + if (UseSVE > 0 && Compile::current()->max_vector_size() >= 16) { // Only non uncommon_trap calls need to reinitialize ptrue. if (uncommon_trap_request() == 0) { __ reinitialize_ptrue(); @@ -4051,9 +4090,6 @@ frame %{ // Inline Cache Register or Method for I2C. inline_cache_reg(R12); - // Method Register when calling interpreter. - interpreter_method_reg(R12); - // Number of stack slots consumed by locking an object sync_stack_slots(2); @@ -4245,6 +4281,26 @@ operand immI_31() interface(CONST_INTER); %} +operand immI_2() +%{ + predicate(n->get_int() == 2); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + +operand immI_4() +%{ + predicate(n->get_int() == 4); + match(ConI); + + op_cost(0); + format %{ %} + interface(CONST_INTER); +%} + operand immI_8() %{ predicate(n->get_int() == 8); @@ -5621,16 +5677,6 @@ operand inline_cache_RegP(iRegP reg) interface(REG_INTER); %} -operand interpreter_method_RegP(iRegP reg) -%{ - constraint(ALLOC_IN_RC(method_reg)); // interpreter_method_reg - match(reg); - match(iRegPNoSp); - op_cost(0); - format %{ %} - interface(REG_INTER); -%} - // Thread Register operand thread_RegP(iRegP reg) %{ @@ -11215,6 +11261,7 @@ instruct rShiftL_reg_imm(iRegLNoSp dst, iRegL src1, immI src2) %{ %} // BEGIN This section of the file is automatically generated. Do not edit -------------- +// This section is generated from aarch64_ad.m4 // This pattern is automatically generated from aarch64_ad.m4 @@ -14685,7 +14732,11 @@ instruct clearArray_reg_reg(iRegL_R11 cnt, iRegP_R10 base, Universe dummy, rFlag format %{ "ClearArray $cnt, $base" %} ins_encode %{ - __ zero_words($base$$Register, $cnt$$Register); + address tpc = __ zero_words($base$$Register, $cnt$$Register); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } %} ins_pipe(pipe_class_memory); @@ -15963,8 +16014,8 @@ instruct CallStaticJavaDirect(method meth) format %{ "call,static $meth \t// ==> " %} - ins_encode( aarch64_enc_java_static_call(meth), - aarch64_enc_call_epilog ); + ins_encode(aarch64_enc_java_static_call(meth), + aarch64_enc_call_epilog); ins_pipe(pipe_class_call); %} @@ -15982,8 +16033,8 @@ instruct CallDynamicJavaDirect(method meth) format %{ "CALL,dynamic $meth \t// ==> " %} - ins_encode( aarch64_enc_java_dynamic_call(meth), - aarch64_enc_call_epilog ); + ins_encode(aarch64_enc_java_dynamic_call(meth), + aarch64_enc_call_epilog); ins_pipe(pipe_class_call); %} @@ -16369,15 +16420,16 @@ instruct string_indexof_conUL(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, ins_pipe(pipe_class_memory); %} -instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, +instruct string_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, iRegINoSp tmp3, rFlagsReg cr) %{ match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); - format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %} + format %{ "StringUTF16 IndexOf char[] $str1,$cnt1,$ch -> $result" %} ins_encode %{ __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, @@ -16387,6 +16439,25 @@ instruct string_indexofU_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, ins_pipe(pipe_class_memory); %} +instruct stringL_indexof_char(iRegP_R1 str1, iRegI_R2 cnt1, iRegI_R3 ch, + iRegI_R0 result, iRegINoSp tmp1, iRegINoSp tmp2, + iRegINoSp tmp3, rFlagsReg cr) +%{ + match(Set result (StrIndexOfChar (Binary str1 cnt1) ch)); + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch, + TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr); + + format %{ "StringLatin1 IndexOf char[] $str1,$cnt1,$ch -> $result" %} + + ins_encode %{ + __ stringL_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, + $result$$Register, $tmp1$$Register, $tmp2$$Register, + $tmp3$$Register); + %} + ins_pipe(pipe_class_memory); +%} + instruct string_equalsL(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt, iRegI_R0 result, rFlagsReg cr) %{ @@ -16429,10 +16500,14 @@ instruct array_equalsB(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} ins_encode %{ - __ arrays_equals($ary1$$Register, $ary2$$Register, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, - $result$$Register, $tmp$$Register, 1); - %} + address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, $tmp$$Register, 1); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } + %} ins_pipe(pipe_class_memory); %} @@ -16446,9 +16521,13 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result, format %{ "Array Equals $ary1,ary2 -> $result // KILL $tmp" %} ins_encode %{ - __ arrays_equals($ary1$$Register, $ary2$$Register, - $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, - $result$$Register, $tmp$$Register, 2); + address tpc = __ arrays_equals($ary1$$Register, $ary2$$Register, + $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, + $result$$Register, $tmp$$Register, 2); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } %} ins_pipe(pipe_class_memory); %} @@ -16459,7 +16538,11 @@ instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg c effect(USE_KILL ary1, USE_KILL len, KILL cr); format %{ "has negatives byte[] $ary1,$len -> $result" %} ins_encode %{ - __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); + address tpc = __ has_negatives($ary1$$Register, $len$$Register, $result$$Register); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } %} ins_pipe( pipe_slow ); %} @@ -16492,8 +16575,13 @@ instruct string_inflate(Universe dummy, iRegP_R0 src, iRegP_R1 dst, iRegI_R2 len format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %} ins_encode %{ - __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, - $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register); + address tpc = __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register, + $tmp1$$FloatRegister, $tmp2$$FloatRegister, + $tmp3$$FloatRegister, $tmp4$$Register); + if (tpc == NULL) { + ciEnv::current()->record_failure("CodeCache is full"); + return; + } %} ins_pipe(pipe_class_memory); %} @@ -16821,6 +16909,7 @@ instruct replicate2D(vecX dst, vRegD src) instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, iRegINoSp tmp2) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP tmp, TEMP tmp2); @@ -16840,6 +16929,7 @@ instruct reduce_add2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp, instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP vtmp, TEMP itmp); @@ -16858,6 +16948,7 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iReg instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP tmp, TEMP dst); @@ -16877,6 +16968,7 @@ instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp, iRegINoSp itmp) %{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (MulReductionVI isrc vsrc)); ins_cost(INSN_COST); effect(TEMP vtmp, TEMP itmp, TEMP dst); @@ -17958,8 +18050,7 @@ instruct vabs2F(vecD dst, vecD src) ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (2S)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2S, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp64); %} @@ -17971,8 +18062,7 @@ instruct vabs4F(vecX dst, vecX src) ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (4S)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T4S, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp128); %} @@ -17984,8 +18074,7 @@ instruct vabs2D(vecX dst, vecX src) ins_cost(INSN_COST * 3); format %{ "fabs $dst,$src\t# vector (2D)" %} ins_encode %{ - __ fabs(as_FloatRegister($dst$$reg), __ T2D, - as_FloatRegister($src$$reg)); + __ fabs(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg)); %} ins_pipe(vunop_fp128); %} @@ -18126,7 +18215,8 @@ instruct vxor16B(vecX dst, vecX src1, vecX src2) // ------------------------------ Shift --------------------------------------- instruct vshiftcnt8B(vecD dst, iRegIorL2I cnt) %{ - predicate(n->as_Vector()->length_in_bytes() == 8); + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); match(Set dst (LShiftCntV cnt)); match(Set dst (RShiftCntV cnt)); format %{ "dup $dst, $cnt\t# shift count vector (8B)" %} @@ -18834,6 +18924,216 @@ instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(vshift128_imm); %} +instruct vsraa8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ ssra(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (RShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) sh = 7; + __ ssra(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ ssra(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (RShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) sh = 15; + __ ssra(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsraa4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (RShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsraa2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst (RShiftVL src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "ssra $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ ssra(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($src$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ usra(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (AddVB dst (URShiftVB src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (16B)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 8) { + __ eor(as_FloatRegister($src$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ usra(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($src$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (AddVS dst (URShiftVS src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (8H)" %} + ins_encode %{ + int sh = (int)$shift$$constant; + if (sh >= 16) { + __ eor(as_FloatRegister($src$$reg), __ T16B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ usra(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift64_imm); +%} + +instruct vsrla4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (AddVI dst (URShiftVI src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (4S)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + +instruct vsrla2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVL dst (URShiftVL src (RShiftCntV shift)))); + ins_cost(INSN_COST); + format %{ "usra $dst, $src, $shift\t# vector (2D)" %} + ins_encode %{ + __ usra(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src$$reg), + (int)$shift$$constant); + %} + ins_pipe(vshift128_imm); +%} + instruct vmax2F(vecD dst, vecD src1, vecD src2) %{ predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); @@ -18950,12 +19250,12 @@ instruct vpopcount4I(vecX dst, vecX src) %{ "uaddlp $dst, $dst\t# vector (8H)" %} ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, - as_FloatRegister($dst$$reg)); + __ cnt(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_class_default); %} @@ -18969,12 +19269,12 @@ instruct vpopcount2I(vecD dst, vecD src) %{ "uaddlp $dst, $dst\t# vector (4H)" %} ins_encode %{ - __ cnt(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($src$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, - as_FloatRegister($dst$$reg)); - __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, - as_FloatRegister($dst$$reg)); + __ cnt(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($dst$$reg)); + __ uaddlp(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($dst$$reg)); %} ins_pipe(pipe_class_default); %} diff --git a/src/hotspot/cpu/aarch64/aarch64_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_ad.m4 index 5893f451459..ac1b6dfec65 100644 --- a/src/hotspot/cpu/aarch64/aarch64_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_ad.m4 @@ -1,4 +1,4 @@ -dnl Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. +dnl Copyright (c) 2019, 2020, Red Hat Inc. All rights reserved. dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. dnl dnl This code is free software; you can redistribute it and/or modify it @@ -19,10 +19,14 @@ dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA dnl or visit www.oracle.com if you need additional information or have any dnl questions. dnl -dnl -dnl Process this file with m4 aarch64_ad.m4 to generate the arithmetic -dnl and shift patterns patterns used in aarch64.ad. dnl +dnl Process this file with m4 aarch64_ad.m4 to generate instructions used in +dnl aarch64.ad: +dnl 1. the arithmetic +dnl 2. shift patterns +dnl +// BEGIN This section of the file is automatically generated. Do not edit -------------- +// This section is generated from aarch64_ad.m4 dnl define(`ORL2I', `ifelse($1,I,orL2I)') dnl diff --git a/src/hotspot/cpu/aarch64/aarch64_neon.ad b/src/hotspot/cpu/aarch64/aarch64_neon.ad new file mode 100644 index 00000000000..33b1a869cc3 --- /dev/null +++ b/src/hotspot/cpu/aarch64/aarch64_neon.ad @@ -0,0 +1,3456 @@ +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ---- + +// AArch64 NEON Architecture Description File + +// ====================VECTOR INSTRUCTIONS================================== + +// ------------------------------ Load/store/reinterpret ----------------------- + +// Load vector (16 bits) +instruct loadV2(vecD dst, memory mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 2); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrh $dst,$mem\t# vector (16 bits)" %} + ins_encode( aarch64_enc_ldrvH(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Store Vector (16 bits) +instruct storeV2(vecD src, memory mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 2); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strh $mem,$src\t# vector (16 bits)" %} + ins_encode( aarch64_enc_strvH(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} + +instruct reinterpretD(vecD dst) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ " # reinterpret $dst" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +instruct reinterpretX(vecX dst) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ " # reinterpret $dst" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%} + +instruct reinterpretD2X(vecX dst, vecD src) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == 16 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 8); + match(Set dst (VectorReinterpret src)); + ins_cost(INSN_COST); + format %{ " # reinterpret $dst,$src" %} + ins_encode %{ + // If register is the same, then move is not needed. + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + %} + ins_pipe(vlogical64); +%} + +instruct reinterpretX2D(vecD dst, vecX src) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == 8 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == 16); + match(Set dst (VectorReinterpret src)); + ins_cost(INSN_COST); + format %{ " # reinterpret $dst,$src" %} + ins_encode %{ + // If register is the same, then move is not needed. + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + %} + ins_pipe(vlogical64); +%} + +// ------------------------------ Vector cast ------------------------------- + +instruct vcvt4Bto4S(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastB2X src)); + format %{ "sxtl $dst, T8H, $src, T8B\t# convert 4B to 4S vector" %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt8Bto8S(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastB2X src)); + format %{ "sxtl $dst, T8H, $src, T8B\t# convert 8B to 8S vector" %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt4Sto4B(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastS2X src)); + format %{ "xtn $dst, T8B, $src, T8H\t# convert 4S to 4B vector" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt8Sto8B(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastS2X src)); + format %{ "xtn $dst, T8B, $src, T8H\t# convert 8S to 8B vector" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt4Sto4I(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastS2X src)); + format %{ "sxtl $dst, T4S, $src, T4H\t# convert 4S to 4I vector" %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt4Ito4S(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorCastI2X src)); + format %{ "xtn $dst, T4H, $src, T4S\t# convert 4I to 4S vector" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Ito2L(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorCastI2X src)); + format %{ "sxtl $dst, T2D, $src, T2S\t# convert 2I to 2L vector" %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Lto2I(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastL2X src)); + format %{ "xtn $dst, T2S, $src, T2D\t# convert 2L to 2I vector" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt4Bto4I(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorCastB2X src)); + format %{ "sxtl $dst, T8H, $src, T8B\n\t" + "sxtl $dst, T4S, $dst, T4H\t# convert 4B to 4I vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvt4Ito4B(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorCastI2X src)); + format %{ "xtn $dst, T4H, $src, T4S\n\t" + "xtn $dst, T8B, $dst, T8H\t# convert 4I to 4B vector" + %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S); + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvt4Bto4F(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastB2X src)); + format %{ "sxtl $dst, T8H, $src, T8B\n\t" + "sxtl $dst, T4S, $dst, T4H\n\t" + "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvt4Sto4F(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastS2X src)); + format %{ "sxtl $dst, T4S, $src, T4H\n\t" + "scvtfv T4S, $dst, $dst\t# convert 4S to 4F vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($src$$reg), __ T4H); + __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvt2Ito2D(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastI2X src)); + format %{ "sxtl $dst, T2D, $src, T2S\n\t" + "scvtfv T2D, $dst, $dst\t# convert 2I to 2D vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S); + __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcvt2Ito2F(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastI2X src)); + format %{ "scvtfv T2S, $dst, $src\t# convert 2I to 2F vector" %} + ins_encode %{ + __ scvtfv(__ T2S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt4Ito4F(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastI2X src)); + format %{ "scvtfv T4S, $dst, $src\t# convert 4I to 4F vector" %} + ins_encode %{ + __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Lto2D(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastL2X src)); + format %{ "scvtfv T2D, $dst, $src\t# convert 2L to 2D vector" %} + ins_encode %{ + __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Fto2D(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorCastF2X src)); + format %{ "fcvtl $dst, T2D, $src, T2S\t# convert 2F to 2D vector" %} + ins_encode %{ + __ fcvtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($src$$reg), __ T2S); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Dto2F(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastD2X src)); + format %{ "fcvtn $dst, T2S, $src, T2D\t# convert 2D to 2F vector" %} + ins_encode %{ + __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D); + %} + ins_pipe(pipe_class_default); +%} + +instruct vcvt2Lto2F(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastL2X src)); + format %{ "scvtfv T2D, $dst, $src\n\t" + "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector" + %} + ins_encode %{ + __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Reduction ------------------------------- + +instruct reduce_add8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addv $tmp, T8B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "addw $dst, $dst, $isrc\n\t" + "sxtb $dst, $dst\t# add reduction8B" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ addw($dst$$Register, $dst$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addv $tmp, T16B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "addw $dst, $dst, $isrc\n\t" + "sxtb $dst, $dst\t# add reduction16B" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ addw($dst$$Register, $dst$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addv $tmp, T4H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "addw $dst, $dst, $isrc\n\t" + "sxth $dst, $dst\t# add reduction4S" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0); + __ addw($dst$$Register, $dst$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addv $tmp, T8H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "addw $dst, $dst, $isrc\n\t" + "sxth $dst, $dst\t# add reduction8S" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0); + __ addw($dst$$Register, $dst$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (AddReductionVL isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addpd $tmp, $vsrc\n\t" + "umov $dst, $tmp, D, 0\n\t" + "add $dst, $isrc, $dst\t# add reduction2L" + %} + ins_encode %{ + __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); + __ add($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" + "ins $vtmp2, H, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, B, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxtb $dst, $dst\n\t" + "umov $itmp, $vtmp2, B, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxtb $dst, $dst\t# mul reduction8B" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ H, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" + "ins $vtmp2, S, $vtmp1, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t" + "ins $vtmp2, H, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, B, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxtb $dst, $dst\n\t" + "umov $itmp, $vtmp2, B, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxtb $dst, $dst\t# mul reduction16B" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ S, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ H, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp); + format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t" + "mulv $vtmp, T4H, $vtmp, $vsrc\n\t" + "umov $itmp, $vtmp, H, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxth $dst, $dst\n\t" + "umov $itmp, $vtmp, H, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxth $dst, $dst\t# mul reduction4S" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp$$reg), __ T4H, + as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t" + "ins $vtmp2, S, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, H, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxth $dst, $dst\n\t" + "umov $itmp, $vtmp2, H, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxth $dst, $dst\t# mul reduction8S" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ S, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + match(Set dst (MulReductionVL isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "mul $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "mul $dst, $dst, $tmp\t# mul reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ mul($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ mul($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "smaxv $tmp, T8B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction8B" + %} + ins_encode %{ + __ smaxv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "smaxv $tmp, T16B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction16B" + %} + ins_encode %{ + __ smaxv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "smaxv $tmp, T4H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction4S" + %} + ins_encode %{ + __ smaxv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "smaxv $tmp, T8H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction8S" + %} + ins_encode %{ + __ smaxv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "smaxv $tmp, T4S, $vsrc\n\t" + "umov $dst, $tmp, S, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction4I" + %} + ins_encode %{ + __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg)); + __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "sminv $tmp, T8B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction8B" + %} + ins_encode %{ + __ sminv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "sminv $tmp, T16B, $vsrc\n\t" + "smov $dst, $tmp, B, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction16B" + %} + ins_encode %{ + __ sminv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "sminv $tmp, T4H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction4S" + %} + ins_encode %{ + __ sminv(as_FloatRegister($tmp$$reg), __ T4H, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "sminv $tmp, T8H, $vsrc\n\t" + "smov $dst, $tmp, H, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction8S" + %} + ins_encode %{ + __ sminv(as_FloatRegister($tmp$$reg), __ T8H, as_FloatRegister($vsrc$$reg)); + __ smov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ H, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "sminv $tmp, T4S, $vsrc\n\t" + "umov $dst, $tmp, S, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction4I" + %} + ins_encode %{ + __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($vsrc$$reg)); + __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "dup $tmp, T2D, $vsrc\n\t" + "smaxv $tmp, T4S, $tmp\n\t" + "umov $dst, $tmp, S, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc GT\t# max reduction2I" + %} + ins_encode %{ + __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg)); + __ smaxv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg)); + __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "dup $tmp, T2D, $vsrc\n\t" + "sminv $tmp, T4S, $tmp\n\t" + "umov $dst, $tmp, S, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc LT\t# min reduction2I" + %} + ins_encode %{ + __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg)); + __ sminv(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg)); + __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_max2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "cmp $isrc,$tmp\n\t" + "csel $dst, $isrc, $tmp GT\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "cmp $dst, $tmp\n\t" + "csel $dst, $dst, $tmp GT\t# max reduction2L" + %} + ins_encode %{ + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0); + __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::GT); + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1); + __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::GT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_min2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "cmp $isrc,$tmp\n\t" + "csel $dst, $isrc, $tmp LT\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "cmp $dst, $tmp\n\t" + "csel $dst, $dst, $tmp LT\t# min reduction2L" + %} + ins_encode %{ + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0); + __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::LT); + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1); + __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::LT); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "andw $dst, $dst, $tmp\n\t" + "andw $dst, $dst, $dst, LSR #16\n\t" + "andw $dst, $dst, $dst, LSR #8\n\t" + "andw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# and reduction8B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ andw($dst$$Register, $dst$$Register, $tmp$$Register); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ andw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "orrw $dst, $dst, $tmp\n\t" + "orrw $dst, $dst, $dst, LSR #16\n\t" + "orrw $dst, $dst, $dst, LSR #8\n\t" + "orrw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# orr reduction8B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ orrw($dst$$Register, $dst$$Register, $tmp$$Register); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ orrw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "eorw $dst, $dst, $tmp\n\t" + "eorw $dst, $dst, $dst, LSR #16\n\t" + "eorw $dst, $dst, $dst, LSR #8\n\t" + "eorw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# eor reduction8B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ eorw($dst$$Register, $dst$$Register, $tmp$$Register); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ eorw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "andr $dst, $dst, $tmp\n\t" + "andr $dst, $dst, $dst, LSR #32\n\t" + "andw $dst, $dst, $dst, LSR #16\n\t" + "andw $dst, $dst, $dst, LSR #8\n\t" + "andw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# and reduction16B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ andw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "orr $dst, $dst, $tmp\n\t" + "orr $dst, $dst, $dst, LSR #32\n\t" + "orrw $dst, $dst, $dst, LSR #16\n\t" + "orrw $dst, $dst, $dst, LSR #8\n\t" + "orrw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# orr reduction16B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ orr ($dst$$Register, $dst$$Register, $tmp$$Register); + __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ orrw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "eor $dst, $dst, $tmp\n\t" + "eor $dst, $dst, $dst, LSR #32\n\t" + "eorw $dst, $dst, $dst, LSR #16\n\t" + "eorw $dst, $dst, $dst, LSR #8\n\t" + "eorw $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# eor reduction16B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ eor ($dst$$Register, $dst$$Register, $tmp$$Register); + __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ eorw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "andw $dst, $dst, $tmp\n\t" + "andw $dst, $dst, $dst, LSR #16\n\t" + "andw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# and reduction4S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ andw($dst$$Register, $dst$$Register, $tmp$$Register); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ andw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "orrw $dst, $dst, $tmp\n\t" + "orrw $dst, $dst, $dst, LSR #16\n\t" + "orrw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# orr reduction4S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ orrw($dst$$Register, $dst$$Register, $tmp$$Register); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ orrw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "eorw $dst, $dst, $tmp\n\t" + "eorw $dst, $dst, $dst, LSR #16\n\t" + "eorw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# eor reduction4S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ eorw($dst$$Register, $dst$$Register, $tmp$$Register); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ eorw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "andr $dst, $dst, $tmp\n\t" + "andr $dst, $dst, $dst, LSR #32\n\t" + "andw $dst, $dst, $dst, LSR #16\n\t" + "andw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# and reduction8S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ andw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ andw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "orr $dst, $dst, $tmp\n\t" + "orr $dst, $dst, $dst, LSR #32\n\t" + "orrw $dst, $dst, $dst, LSR #16\n\t" + "orrw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# orr reduction8S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ orr ($dst$$Register, $dst$$Register, $tmp$$Register); + __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ orrw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ orrw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "eor $dst, $dst, $tmp\n\t" + "eor $dst, $dst, $dst, LSR #32\n\t" + "eorw $dst, $dst, $dst, LSR #16\n\t" + "eorw $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# eor reduction8S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ eor ($dst$$Register, $dst$$Register, $tmp$$Register); + __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ eorw($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ eorw($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "andw $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "andw $dst, $tmp, $dst\t# and reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ andw($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ andw($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "orrw $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "orrw $dst, $tmp, $dst\t# orr reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ orrw($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ orrw($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "eorw $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "eorw $dst, $tmp, $dst\t# eor reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ eorw($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ eorw($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "andr $dst, $dst, $tmp\n\t" + "andr $dst, $dst, $dst, LSR #32\n\t" + "andw $dst, $isrc, $dst\t# and reduction4I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + __ andr($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ andw($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "orr $dst, $dst, $tmp\n\t" + "orr $dst, $dst, $dst, LSR #32\n\t" + "orrw $dst, $isrc, $dst\t# orr reduction4I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ orr ($dst$$Register, $dst$$Register, $tmp$$Register); + __ orr ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ orrw($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "eor $dst, $dst, $tmp\n\t" + "eor $dst, $dst, $dst, LSR #32\n\t" + "eorw $dst, $isrc, $dst\t# eor reduction4I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ eor ($dst$$Register, $dst$$Register, $tmp$$Register); + __ eor ($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ eorw($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_and2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (AndReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "andr $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "andr $dst, $dst, $tmp\t# and reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ andr($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ andr($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_orr2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (OrReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "orr $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "orr $dst, $dst, $tmp\t# orr reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ orr ($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ orr ($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_eor2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (XorReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "eor $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "eor $dst, $dst, $tmp\t# eor reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ eor ($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ eor ($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector insert --------------------------------- + +instruct insert8B(vecD dst, vecD src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T8B, $src, $src\n\t" + "mov $dst, T8B, $idx, $val\t# insert into vector(8B)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T8B, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert16B(vecX dst, vecX src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T16B, $src, $src\n\t" + "mov $dst, T16B, $idx, $val\t# insert into vector(16B)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T16B, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert4S(vecD dst, vecD src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T8B, $src, $src\n\t" + "mov $dst, T4H, $idx, $val\t# insert into vector(4S)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T4H, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert8S(vecX dst, vecX src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T16B, $src, $src\n\t" + "mov $dst, T8H, $idx, $val\t# insert into vector(8S)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T8H, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert2I(vecD dst, vecD src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T8B, $src, $src\n\t" + "mov $dst, T2S, $idx, $val\t# insert into vector(2I)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T2S, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert4I(vecX dst, vecX src, iRegIorL2I val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T16B, $src, $src\n\t" + "mov $dst, T4S, $idx, $val\t# insert into vector(4I)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T4S, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert2L(vecX dst, vecX src, iRegL val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T16B, $src, $src\n\t" + "mov $dst, T2D, $idx, $val\t# insert into vector(2L)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T2D, $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct insert2F(vecD dst, vecD src, vRegF val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "orr $dst, T8B, $src, $src\n\t" + "ins $dst, S, $val, $idx, 0\t# insert into vector(2F)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ ins(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($val$$reg), $idx$$constant, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct insert4F(vecX dst, vecX src, vRegF val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "orr $dst, T16B, $src, $src\n\t" + "ins $dst, S, $val, $idx, 0\t# insert into vector(4F)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ ins(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($val$$reg), $idx$$constant, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct insert2D(vecX dst, vecX src, vRegD val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "orr $dst, T16B, $src, $src\n\t" + "ins $dst, D, $val, $idx, 0\t# insert into vector(2D)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ ins(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($val$$reg), $idx$$constant, 0); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Vector extract --------------------------------- + +instruct extract8B(iRegINoSp dst, vecD src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 8); + match(Set dst (ExtractB src idx)); + ins_cost(INSN_COST); + format %{ "smov $dst, $src, B, $idx\t# extract from vector(8B)" %} + ins_encode %{ + __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract16B(iRegINoSp dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 16); + match(Set dst (ExtractB src idx)); + ins_cost(INSN_COST); + format %{ "smov $dst, $src, B, $idx\t# extract from vector(16B)" %} + ins_encode %{ + __ smov($dst$$Register, as_FloatRegister($src$$reg), __ B, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract4S(iRegINoSp dst, vecD src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 4); + match(Set dst (ExtractS src idx)); + ins_cost(INSN_COST); + format %{ "smov $dst, $src, H, $idx\t# extract from vector(4S)" %} + ins_encode %{ + __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract8S(iRegINoSp dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 8); + match(Set dst (ExtractS src idx)); + ins_cost(INSN_COST); + format %{ "smov $dst, $src, H, $idx\t# extract from vector(8S)" %} + ins_encode %{ + __ smov($dst$$Register, as_FloatRegister($src$$reg), __ H, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract2I(iRegINoSp dst, vecD src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 2); + match(Set dst (ExtractI src idx)); + ins_cost(INSN_COST); + format %{ "umov $dst, $src, S, $idx\t# extract from vector(2I)" %} + ins_encode %{ + __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract4I(iRegINoSp dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 4); + match(Set dst (ExtractI src idx)); + ins_cost(INSN_COST); + format %{ "umov $dst, $src, S, $idx\t# extract from vector(4I)" %} + ins_encode %{ + __ umov($dst$$Register, as_FloatRegister($src$$reg), __ S, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract2L(iRegLNoSp dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 2); + match(Set dst (ExtractL src idx)); + ins_cost(INSN_COST); + format %{ "umov $dst, $src, D, $idx\t# extract from vector(2L)" %} + ins_encode %{ + __ umov($dst$$Register, as_FloatRegister($src$$reg), __ D, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract2F(vRegF dst, vecD src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 2); + match(Set dst (ExtractF src idx)); + ins_cost(INSN_COST); + format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(2F)" %} + ins_encode %{ + __ ins(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), 0, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract4F(vRegF dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 4); + match(Set dst (ExtractF src idx)); + ins_cost(INSN_COST); + format %{ "ins $dst, S, $src, 0, $idx\t# extract from vector(4F)" %} + ins_encode %{ + __ ins(as_FloatRegister($dst$$reg), __ S, + as_FloatRegister($src$$reg), 0, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +instruct extract2D(vRegD dst, vecX src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == 2); + match(Set dst (ExtractD src idx)); + ins_cost(INSN_COST); + format %{ "ins $dst, D, $src, 0, $idx\t# extract from vector(2D)" %} + ins_encode %{ + __ ins(as_FloatRegister($dst$$reg), __ D, + as_FloatRegister($src$$reg), 0, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%} + +// ------------------------------ Vector comparison --------------------------------- + +instruct vcmeq8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmeq16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (16B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmeq4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmeq8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (8S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmeq2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmeq4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (4I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmeq2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\t# vector cmp (2L)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmeq2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmeq4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (4F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmeq2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::eq && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\t# vector cmp (2D)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmgt16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (16B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmgt8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (8S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmgt4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (4I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src1, $src2\t# vector cmp (2L)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmgt4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (4F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmgt2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::gt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src1, $src2\t# vector cmp (2D)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (8B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmge16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (16B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (4S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmge8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (8S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (2I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmge4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (4I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src1, $src2\t# vector cmp (2L)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmge4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src1, $src2\t# vector cmp (4F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmge2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ge && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src1, $src2\t# vector cmp (2D)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmne8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8B)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (16B)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4S)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (8S)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2I)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (4I)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmeq $dst, $src1, $src2\n\t# vector cmp (2L)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmeq(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2F)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (4F)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmne2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmeq $dst, $src1, $src2\n\t# vector cmp (2D)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmeq(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct vcmlt8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmlt16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (16B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmlt4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmlt8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (8S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmlt2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmlt4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (4I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmlt2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmgt $dst, $src2, $src1\t# vector cmp (2L)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmlt2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmlt4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (4F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmlt2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::lt && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmgt $dst, $src2, $src1\t# vector cmp (2D)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle8B(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (8B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmle16B(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 16 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (16B)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle4S(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (4S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmle8S(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 8 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (8S)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle2I(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (2I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmle4I(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (4I)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle2L(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "cmge $dst, $src2, $src1\t# vector cmp (2L)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ cmge(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle2F(vecD dst, vecD src1, vecD src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vcmle4F(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 4 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src2, $src1\t# vector cmp (4F)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vcmle2D(vecX dst, vecX src1, vecX src2, immI cond) +%{ + predicate(n->as_Vector()->length() == 2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::le && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "fcmge $dst, $src2, $src1\t# vector cmp (2D)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ fcmge(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +// ------------------------------ Vector mul ----------------------------------- + +instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVL src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp1, TEMP tmp2); + format %{ "umov $tmp1, $src1, D, 0\n\t" + "umov $tmp2, $src2, D, 0\n\t" + "mul $tmp2, $tmp2, $tmp1\n\t" + "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t" + "umov $tmp1, $src1, D, 1\n\t" + "umov $tmp2, $src2, D, 1\n\t" + "mul $tmp2, $tmp2, $tmp1\n\t" + "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t" + %} + ins_encode %{ + __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0); + __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); + __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register); + __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1); + __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); + __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register); + %} + ins_pipe(pipe_slow); +%} + +// --------------------------------- Vector not -------------------------------- + +instruct vnot2I(vecD dst, vecD src, immI_M1 m1) +%{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src (ReplicateB m1))); + match(Set dst (XorV src (ReplicateS m1))); + match(Set dst (XorV src (ReplicateI m1))); + ins_cost(INSN_COST); + format %{ "not $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ notr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vnot4I(vecX dst, vecX src, immI_M1 m1) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src (ReplicateB m1))); + match(Set dst (XorV src (ReplicateS m1))); + match(Set dst (XorV src (ReplicateI m1))); + ins_cost(INSN_COST); + format %{ "not $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ notr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct vnot2L(vecX dst, vecX src, immL_M1 m1) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (XorV src (ReplicateL m1))); + ins_cost(INSN_COST); + format %{ "not $dst, $src\t# vector (16B)" %} + ins_encode %{ + __ notr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +// ------------------------------ Vector max/min ------------------------------- + +instruct vmax8B(vecD dst, vecD src1, vecD src2) +%{ + predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (8B)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmax16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (16B)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmax4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (4S)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmax8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (8S)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmax2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (2I)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmax4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + format %{ "maxv $dst, $src1, $src2\t# vector (4I)" %} + ins_encode %{ + __ maxv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmin8B(vecD dst, vecD src1, vecD src2) +%{ + predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (8B)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmin16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (16B)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmin4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (4S)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmin8S(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (8S)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmin2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (2I)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop64); +%} + +instruct vmin4I(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + format %{ "minv $dst, $src1, $src2\t# vector (4I)" %} + ins_encode %{ + __ minv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + + +instruct vmax2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MaxV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP dst); + format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t" + "bsl $dst, $src1, $src2\t# vector (16B)" %} + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ bsl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop128); +%} + +instruct vmin2L(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst (MinV src1 src2)); + ins_cost(INSN_COST); + effect(TEMP dst); + format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t" + "bsl $dst, $src2, $src1\t# vector (16B)" %} + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ bsl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop128); +%} + +// --------------------------------- blend (bsl) ---------------------------- + +instruct vbsl8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 8); + match(Set dst (VectorBlend (Binary src1 src2) dst)); + ins_cost(INSN_COST); + format %{ "bsl $dst, $src2, $src1\t# vector (8B)" %} + ins_encode %{ + __ bsl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vlogical64); +%} + +instruct vbsl16B(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 16); + match(Set dst (VectorBlend (Binary src1 src2) dst)); + ins_cost(INSN_COST); + format %{ "bsl $dst, $src2, $src1\t# vector (16B)" %} + ins_encode %{ + __ bsl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vlogical128); +%} + +// --------------------------------- Load/store Mask ---------------------------- + +instruct loadmask8B(vecD dst, vecD src ) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "negr $dst, $src\t# load mask (8B to 8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadmask16B(vecX dst, vecX src ) +%{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "negr $dst, $src\t# load mask (16B to 16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct storemask8B(vecD dst, vecD src , immI_1 size) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "negr $dst, $src\t# store mask (8B to 8B)" %} + ins_encode %{ + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct storemask16B(vecX dst, vecX src , immI_1 size) +%{ + predicate(n->as_Vector()->length() == 16); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "negr $dst, $src\t# store mask (16B to 16B)" %} + ins_encode %{ + __ negr(as_FloatRegister($dst$$reg), __ T16B, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadmask4S(vecD dst, vecD src ) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\n\t" + "negr $dst, $dst\t# load mask (4B to 4H)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadmask8S(vecX dst, vecD src ) +%{ + predicate(n->as_Vector()->length() == 8 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\n\t" + "negr $dst, $dst\t# load mask (8B to 8H)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ negr(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask4S(vecD dst, vecD src , immI_2 size) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\n\t" + "negr $dst, $dst\t# store mask (4H to 4B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask8S(vecD dst, vecX src , immI_2 size) +%{ + predicate(n->as_Vector()->length() == 8); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\n\t" + "negr $dst, $dst\t# store mask (8H to 8B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($src$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadmask2I(vecD dst, vecD src ) +%{ + predicate(n->as_Vector()->length() == 2 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 2B to 2H\n\t" + "uxtl $dst, $dst\t# 2H to 2S\n\t" + "negr $dst, $dst\t# load mask (2B to 2S)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadmask4I(vecX dst, vecD src ) +%{ + predicate(n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadMask src )); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 4B to 4H\n\t" + "uxtl $dst, $dst\t# 4H to 4S\n\t" + "negr $dst, $dst\t# load mask (4B to 4S)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ negr(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask2I(vecD dst, vecD src , immI_4 size) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\t# 2S to 2H\n\t" + "xtn $dst, $dst\t# 2H to 2B\n\t" + "negr $dst, $dst\t# store mask (2S to 2B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S); + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask4I(vecD dst, vecX src , immI_4 size) +%{ + predicate(n->as_Vector()->length() == 4); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\t# 4S to 4H\n\t" + "xtn $dst, $dst\t# 4H to 4B\n\t" + "negr $dst, $dst\t# store mask (4S to 4B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($src$$reg), __ T4S); + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct loadmask2L(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadMask src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 2B to 2S\n\t" + "uxtl $dst, $dst\t# 2S to 2I\n\t" + "uxtl $dst, $dst\t# 2I to 2L\n\t" + "neg $dst, $dst\t# load mask (2B to 2L)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S); + __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask2L(vecD dst, vecX src, immI_8 size) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\t# 2L to 2I\n\t" + "xtn $dst, $dst\t# 2I to 2S\n\t" + "xtn $dst, $dst\t# 2S to 2B\n\t" + "neg $dst, $dst\t# store mask (2L to 2B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D); + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S); + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- LOAD_IOTA_INDICES---------------------------------- + +instruct loadcon8B(vecD dst, immI0 src) +%{ + predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadConst src)); + ins_cost(INSN_COST); + format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} + ins_encode %{ + __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices())); + __ ldrd(as_FloatRegister($dst$$reg), rscratch1); + %} + ins_pipe(pipe_class_memory); +%} + +instruct loadcon16B(vecX dst, immI0 src) +%{ + predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadConst src)); + ins_cost(INSN_COST); + format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} + ins_encode %{ + __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices())); + __ ldrq(as_FloatRegister($dst$$reg), rscratch1); + %} + ins_pipe(pipe_class_memory); +%} + +//-------------------------------- LOAD_SHUFFLE ---------------------------------- + +instruct loadshuffle8B(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# get 8B shuffle" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadshuffle16B(vecX dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 16 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# get 16B shuffle" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadshuffle4S(vecD dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 4B to 4H" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadshuffle8S(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 8B to 8H" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + %} + ins_pipe(pipe_class_default); +%} + +instruct loadshuffle4I(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 4B to 4H \n\t" + "uxtl $dst, $dst\t# 4H to 4S" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- Rearrange ------------------------------------- +// Here is an example that rearranges a NEON vector with 4 ints: +// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1] +// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3]. +// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1]. +// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1]. +// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404] +// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404]. +// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100] +// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504] +// 6. Use Vm as index register, and use V1 as table register. +// Then get V2 as the result by tbl NEON instructions. +// Notes: +// Step 1 matches VectorLoadConst. +// Step 3 matches VectorLoadShuffle. +// Step 4, 5, 6 match VectorRearrange. +// For VectorRearrange short/int, the reason why such complex calculation is +// required is because NEON tbl supports bytes table only, so for short/int, we +// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl +// to implement rearrange. + +instruct rearrange8B(vecD dst, vecD src, vecD shuffle) +%{ + predicate(n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 8B" %} + ins_encode %{ + __ tbl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct rearrange16B(vecX dst, vecX src, vecX shuffle) +%{ + predicate(n->as_Vector()->length() == 16 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange 16B" %} + ins_encode %{ + __ tbl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct rearrange4S(vecD dst, vecD src, vecD shuffle, vecD tmp0, vecD tmp1) +%{ + predicate(n->as_Vector()->length() == 4 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); + format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t" + "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t" + "mulv $dst, T4H, $shuffle, $tmp0\n\t" + "addv $dst, T8B, $dst, $tmp1\n\t" + "tbl $dst, {$src}, $dst\t# rearrange 4S" %} + ins_encode %{ + __ mov(as_FloatRegister($tmp0$$reg), __ T8B, 0x02); + __ mov(as_FloatRegister($tmp1$$reg), __ T4H, 0x0100); + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); + __ tbl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct rearrange8S(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1) +%{ + predicate(n->as_Vector()->length() == 8 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); + format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t" + "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t" + "mulv $dst, T8H, $shuffle, $tmp0\n\t" + "addv $dst, T16B, $dst, $tmp1\n\t" + "tbl $dst, {$src}, $dst\t# rearrange 8S" %} + ins_encode %{ + __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x02); + __ mov(as_FloatRegister($tmp1$$reg), __ T8H, 0x0100); + __ mulv(as_FloatRegister($dst$$reg), __ T8H, + as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); + __ tbl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1) +%{ + predicate(n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); + format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t" + "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t" + "mulv $dst, T8H, $shuffle, $tmp0\n\t" + "addv $dst, T16B, $dst, $tmp1\n\t" + "tbl $dst, {$src}, $dst\t# rearrange 4I" %} + ins_encode %{ + __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04); + __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100); + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); + __ tbl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- Anytrue/alltrue ----------------------------- + +instruct anytrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::ne); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "addv $tmp, T8B, $src1\t# src1 and src2 are the same\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($src1$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::NE); + %} + ins_pipe(pipe_slow); +%} + +instruct anytrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::ne); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "addv $tmp, T16B, $src1\t# src1 and src2 are the same\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($src1$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::NE); + %} + ins_pipe(pipe_slow); +%} + +instruct alltrue_in_mask8B(iRegINoSp dst, vecD src1, vecD src2, vecD tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "andr $tmp, T8B, $src1, $src2\t# src2 is maskAllTrue\n\t" + "notr $tmp, T8B, $tmp\n\t" + "addv $tmp, T8B, $tmp\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ andr(as_FloatRegister($tmp$$reg), __ T8B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg)); + __ addv(as_FloatRegister($tmp$$reg), __ T8B, as_FloatRegister($tmp$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::EQ); + %} + ins_pipe(pipe_slow); +%} + +instruct alltrue_in_mask16B(iRegINoSp dst, vecX src1, vecX src2, vecX tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "andr $tmp, T16B, $src1, $src2\t# src2 is maskAllTrue\n\t" + "notr $tmp, T16B, $tmp\n\t" + "addv $tmp, T16B, $tmp\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ andr(as_FloatRegister($tmp$$reg), __ T16B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg)); + __ addv(as_FloatRegister($tmp$$reg), __ T16B, as_FloatRegister($tmp$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::EQ); + %} + ins_pipe(pipe_slow); +%} diff --git a/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 new file mode 100644 index 00000000000..0b1dc5cb7c6 --- /dev/null +++ b/src/hotspot/cpu/aarch64/aarch64_neon_ad.m4 @@ -0,0 +1,1424 @@ +// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2020, Arm Limited. All rights reserved. +// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +// +// This code is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License version 2 only, as +// published by the Free Software Foundation. +// +// This code is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +// version 2 for more details (a copy is included in the LICENSE file that +// accompanied this code). +// +// You should have received a copy of the GNU General Public License version +// 2 along with this work; if not, write to the Free Software Foundation, +// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +// +// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +// or visit www.oracle.com if you need additional information or have any +// questions. +// +// + +dnl Generate the warning +// This file is automatically generated by running "m4 aarch64_neon_ad.m4". Do not edit ---- +dnl + +// AArch64 NEON Architecture Description File + +dnl +define(`ORL2I', `ifelse($1,I,orL2I)')dnl +dnl +define(`error', `__program__:__file__:__line__: Invalid argument ``$1''m4exit(`1')')dnl +dnl +define(`iTYPE2SIMD', +`ifelse($1, `B', `B', + $1, `S', `H', + $1, `I', `S', + $1, `L', `D', + `error($1)')')dnl +dnl +define(`fTYPE2SIMD', +`ifelse($1, `F', `S', + $1, `D', `D', + `error($1)')')dnl +dnl +define(`TYPE2DATATYPE', +`ifelse($1, `B', `BYTE', + $1, `S', `SHORT', + $1, `I', `INT', + $1, `L', `LONG', + $1, `F', `FLOAT', + $1, `D', `DOUBLE', + `error($1)')')dnl +dnl +// ====================VECTOR INSTRUCTIONS================================== + +// ------------------------------ Load/store/reinterpret ----------------------- + +// Load vector (16 bits) +instruct loadV2(vecD dst, memory mem) +%{ + predicate(n->as_LoadVector()->memory_size() == 2); + match(Set dst (LoadVector mem)); + ins_cost(4 * INSN_COST); + format %{ "ldrh $dst,$mem\t# vector (16 bits)" %} + ins_encode( aarch64_enc_ldrvH(dst, mem) ); + ins_pipe(vload_reg_mem64); +%} + +// Store Vector (16 bits) +instruct storeV2(vecD src, memory mem) +%{ + predicate(n->as_StoreVector()->memory_size() == 2); + match(Set mem (StoreVector mem src)); + ins_cost(4 * INSN_COST); + format %{ "strh $mem,$src\t# vector (16 bits)" %} + ins_encode( aarch64_enc_strvH(src, mem) ); + ins_pipe(vstore_reg_mem64); +%} +dnl +define(`REINTERPRET', ` +instruct reinterpret$1`'(vec$1 dst) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == $2 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $2); + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ " # reinterpret $dst" %} + ins_encode %{ + // empty + %} + ins_pipe(pipe_class_empty); +%}')dnl +dnl $1 $2 +REINTERPRET(D, 8) +REINTERPRET(X, 16) +dnl +define(`REINTERPRET_X', ` +instruct reinterpret$1`'2$2`'(vec$2 dst, vec$1 src) +%{ + predicate(n->bottom_type()->is_vect()->length_in_bytes() == $3 && + n->in(1)->bottom_type()->is_vect()->length_in_bytes() == $4); + match(Set dst (VectorReinterpret src)); + ins_cost(INSN_COST); + format %{ " # reinterpret $dst,$src" %} + ins_encode %{ + // If register is the same, then move is not needed. + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } + %} + ins_pipe(vlogical64); +%}')dnl +dnl $1 $2 $3 $4 +REINTERPRET_X(D, X, 16, 8) +REINTERPRET_X(X, D, 8, 16) +dnl + +// ------------------------------ Vector cast ------------------------------- +dnl +define(`VECTOR_CAST_I2I', ` +instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$5 src) +%{ + predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorCast$2`'2X src)); + format %{ "$6 $dst, T$8, $src, T$7\t# convert $1$2 to $1$3 vector" %} + ins_encode %{ + __ $6(as_FloatRegister($dst$$reg), __ T$8, as_FloatRegister($src$$reg), __ T$7); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 +VECTOR_CAST_I2I(4, B, S, D, D, sxtl, 8B, 8H) +VECTOR_CAST_I2I(8, B, S, X, D, sxtl, 8B, 8H) +VECTOR_CAST_I2I(4, S, B, D, D, xtn, 8H, 8B) +VECTOR_CAST_I2I(8, S, B, D, X, xtn, 8H, 8B) +VECTOR_CAST_I2I(4, S, I, X, D, sxtl, 4H, 4S) +VECTOR_CAST_I2I(4, I, S, D, X, xtn, 4S, 4H) +VECTOR_CAST_I2I(2, I, L, X, D, sxtl, 2S, 2D) +VECTOR_CAST_I2I(2, L, I, D, X, xtn, 2D, 2S) +dnl +define(`VECTOR_CAST_B2I', ` +instruct vcvt4$1to4$2`'(vec$3 dst, vec$4 src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + format %{ "$5 $dst, T$7, $src, T$6\n\t" + "$5 $dst, T$9, $dst, T$8\t# convert 4$1 to 4$2 vector" + %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6); + __ $5(as_FloatRegister($dst$$reg), __ T$9, as_FloatRegister($dst$$reg), __ T$8); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 +VECTOR_CAST_B2I(B, I, X, D, sxtl, 8B, 8H, 4H, 4S) +VECTOR_CAST_B2I(I, B, D, X, xtn, 4S, 4H, 8H, 8B) + +instruct vcvt4Bto4F(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastB2X src)); + format %{ "sxtl $dst, T8H, $src, T8B\n\t" + "sxtl $dst, T4S, $dst, T4H\n\t" + "scvtfv T4S, $dst, $dst\t# convert 4B to 4F vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ sxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ scvtfv(__ T4S, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} +dnl +define(`VECTOR_CAST_I2F_L', ` +instruct vcvt$1$2to$1$3`'(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorCast$2`'2X src)); + format %{ "sxtl $dst, T$5, $src, T$4\n\t" + "scvtfv T$5, $dst, $dst\t# convert $1$2 to $1$3 vector" + %} + ins_encode %{ + __ sxtl(as_FloatRegister($dst$$reg), __ T$5, as_FloatRegister($src$$reg), __ T$4); + __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_CAST_I2F_L(4, S, F, 4H, 4S) +VECTOR_CAST_I2F_L(2, I, D, 2S, 2D) +dnl +define(`VECTOR_CAST_I2F', ` +instruct vcvt$1$2to$1$3`'(vec$4 dst, vec$4 src) +%{ + predicate(n->as_Vector()->length() == $1 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorCast$2`'2X src)); + format %{ "scvtfv T$5, $dst, $src\t# convert $1$2 to $1$3 vector" %} + ins_encode %{ + __ scvtfv(__ T$5, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_CAST_I2F(2, I, F, D, 2S) +VECTOR_CAST_I2F(4, I, F, X, 4S) +VECTOR_CAST_I2F(2, L, D, X, 2D) +dnl +define(`VECTOR_CAST_F2F', ` +instruct vcvt2$1to2$2`'(vec$3 dst, vec$4 src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorCast$1`'2X src)); + format %{ "$5 $dst, T$7, $src, T$6\t# convert 2$1 to 2$2 vector" %} + ins_encode %{ + __ $5(as_FloatRegister($dst$$reg), __ T$7, as_FloatRegister($src$$reg), __ T$6); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +VECTOR_CAST_F2F(F, D, X, D, fcvtl, 2S, 2D) +VECTOR_CAST_F2F(D, F, D, X, fcvtn, 2D, 2S) +dnl + +instruct vcvt2Lto2F(vecD dst, vecX src) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT); + match(Set dst (VectorCastL2X src)); + format %{ "scvtfv T2D, $dst, $src\n\t" + "fcvtn $dst, T2S, $dst, T2D\t# convert 2L to 2F vector" + %} + ins_encode %{ + __ scvtfv(__ T2D, as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg)); + __ fcvtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($dst$$reg), __ T2D); + %} + ins_pipe(pipe_slow); +%} + +// ------------------------------ Reduction ------------------------------- +dnl +define(`REDUCE_ADD_BORS', ` +instruct reduce_add$1$2`'(iRegINoSp dst, iRegIorL2I isrc, vec$3 vsrc, vec$3 tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (AddReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addv $tmp, T$1`'iTYPE2SIMD($2), $vsrc\n\t" + "smov $dst, $tmp, iTYPE2SIMD($2), 0\n\t" + "addw $dst, $dst, $isrc\n\t" + "sxt$4 $dst, $dst\t# add reduction$1$2" + %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T$1`'iTYPE2SIMD($2), as_FloatRegister($vsrc$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($2), 0); + __ addw($dst$$Register, $dst$$Register, $isrc$$Register); + __ sxt$4($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 +REDUCE_ADD_BORS(8, B, D, b) +REDUCE_ADD_BORS(16, B, X, b) +REDUCE_ADD_BORS(4, S, D, h) +REDUCE_ADD_BORS(8, S, X, h) +dnl + +instruct reduce_add2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, vecX tmp) +%{ + match(Set dst (AddReductionVL isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "addpd $tmp, $vsrc\n\t" + "umov $dst, $tmp, D, 0\n\t" + "add $dst, $isrc, $dst\t# add reduction2L" + %} + ins_encode %{ + __ addpd(as_FloatRegister($tmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ D, 0); + __ add($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp1, vecD vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, S, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" + "ins $vtmp2, H, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, B, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxtb $dst, $dst\n\t" + "umov $itmp, $vtmp2, B, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxtb $dst, $dst\t# mul reduction8B" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ H, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp1, $vsrc\n\t" + "ins $vtmp2, S, $vtmp1, 0, 1\n\t" + "mulv $vtmp1, T8B, $vtmp2, $vtmp1\n\t" + "ins $vtmp2, H, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T8B, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, B, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxtb $dst, $dst\n\t" + "umov $itmp, $vtmp2, B, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxtb $dst, $dst\t# mul reduction16B" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ S, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ H, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T8B, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxtb($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ B, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecD vtmp, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp, TEMP itmp); + format %{ "ins $vtmp, S, $vsrc, 0, 1\n\t" + "mulv $vtmp, T4H, $vtmp, $vsrc\n\t" + "umov $itmp, $vtmp, H, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxth $dst, $dst\n\t" + "umov $itmp, $vtmp, H, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxth $dst, $dst\t# mul reduction4S" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp$$reg), __ S, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp$$reg), __ T4H, + as_FloatRegister($vtmp$$reg), as_FloatRegister($vsrc$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp$$reg), __ H, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, vecX vtmp1, vecX vtmp2, iRegINoSp itmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (MulReductionVI isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP vtmp1, TEMP vtmp2, TEMP itmp); + format %{ "ins $vtmp1, D, $vsrc, 0, 1\n\t" + "mulv $vtmp1, T4H, $vtmp1, $vsrc\n\t" + "ins $vtmp2, S, $vtmp1, 0, 1\n\t" + "mulv $vtmp2, T4H, $vtmp2, $vtmp1\n\t" + "umov $itmp, $vtmp2, H, 0\n\t" + "mulw $dst, $itmp, $isrc\n\t" + "sxth $dst, $dst\n\t" + "umov $itmp, $vtmp2, H, 1\n\t" + "mulw $dst, $itmp, $dst\n\t" + "sxth $dst, $dst\t# mul reduction8S" + %} + ins_encode %{ + __ ins(as_FloatRegister($vtmp1$$reg), __ D, + as_FloatRegister($vsrc$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp1$$reg), __ T4H, + as_FloatRegister($vtmp1$$reg), as_FloatRegister($vsrc$$reg)); + __ ins(as_FloatRegister($vtmp2$$reg), __ S, + as_FloatRegister($vtmp1$$reg), 0, 1); + __ mulv(as_FloatRegister($vtmp2$$reg), __ T4H, + as_FloatRegister($vtmp2$$reg), as_FloatRegister($vtmp1$$reg)); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 0); + __ mulw($dst$$Register, $itmp$$Register, $isrc$$Register); + __ sxth($dst$$Register, $dst$$Register); + __ umov($itmp$$Register, as_FloatRegister($vtmp2$$reg), __ H, 1); + __ mulw($dst$$Register, $itmp$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_mul2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + match(Set dst (MulReductionVL isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "mul $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "mul $dst, $dst, $tmp\t# mul reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ mul($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ mul($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%} +dnl +define(`REDUCE_MAX_MIN_INT', ` +instruct reduce_$1$2$3`'(iRegINoSp dst, iRegIorL2I isrc, vec$4 vsrc, vec$4 tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst ($5ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "s$1v $tmp, T$2`'iTYPE2SIMD($3), $vsrc\n\t" + "$6mov $dst, $tmp, iTYPE2SIMD($3), 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc $7\t# $1 reduction$2$3" + %} + ins_encode %{ + __ s$1v(as_FloatRegister($tmp$$reg), __ T$2`'iTYPE2SIMD($3), as_FloatRegister($vsrc$$reg)); + __ $6mov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ iTYPE2SIMD($3), 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$7); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +REDUCE_MAX_MIN_INT(max, 8, B, D, Max, s, GT) +REDUCE_MAX_MIN_INT(max, 16, B, X, Max, s, GT) +REDUCE_MAX_MIN_INT(max, 4, S, D, Max, s, GT) +REDUCE_MAX_MIN_INT(max, 8, S, X, Max, s, GT) +REDUCE_MAX_MIN_INT(max, 4, I, X, Max, u, GT) +REDUCE_MAX_MIN_INT(min, 8, B, D, Min, s, LT) +REDUCE_MAX_MIN_INT(min, 16, B, X, Min, s, LT) +REDUCE_MAX_MIN_INT(min, 4, S, D, Min, s, LT) +REDUCE_MAX_MIN_INT(min, 8, S, X, Min, s, LT) +REDUCE_MAX_MIN_INT(min, 4, I, X, Min, u, LT) +dnl +define(`REDUCE_MAX_MIN_2I', ` +instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, vecX tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "dup $tmp, T2D, $vsrc\n\t" + "s$1v $tmp, T4S, $tmp\n\t" + "umov $dst, $tmp, S, 0\n\t" + "cmpw $dst, $isrc\n\t" + "cselw $dst, $dst, $isrc $3\t# $1 reduction2I" + %} + ins_encode %{ + __ dup(as_FloatRegister($tmp$$reg), __ T2D, as_FloatRegister($vsrc$$reg)); + __ s$1v(as_FloatRegister($tmp$$reg), __ T4S, as_FloatRegister($tmp$$reg)); + __ umov(as_Register($dst$$reg), as_FloatRegister($tmp$$reg), __ S, 0); + __ cmpw(as_Register($dst$$reg), as_Register($isrc$$reg)); + __ cselw(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($isrc$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_MAX_MIN_2I(max, Max, GT) +REDUCE_MAX_MIN_2I(min, Min, LT) +dnl +define(`REDUCE_MAX_MIN_2L', ` +instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp, rFlagsReg cr) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp, KILL cr); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "cmp $isrc,$tmp\n\t" + "csel $dst, $isrc, $tmp $3\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "cmp $dst, $tmp\n\t" + "csel $dst, $dst, $tmp $3\t# $1 reduction2L" + %} + ins_encode %{ + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 0); + __ cmp(as_Register($isrc$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($isrc$$reg), as_Register($tmp$$reg), Assembler::$3); + __ umov(as_Register($tmp$$reg), as_FloatRegister($vsrc$$reg), __ D, 1); + __ cmp(as_Register($dst$$reg), as_Register($tmp$$reg)); + __ csel(as_Register($dst$$reg), as_Register($dst$$reg), as_Register($tmp$$reg), Assembler::$3); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_MAX_MIN_2L(max, Max, GT) +REDUCE_MAX_MIN_2L(min, Min, LT) +dnl +define(`REDUCE_LOGIC_OP_8B', ` +instruct reduce_$1`'8B(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "$1w $dst, $dst, $tmp\n\t" + "$1w $dst, $dst, $dst, LSR #16\n\t" + "$1w $dst, $dst, $dst, LSR #8\n\t" + "$1w $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# $1 reduction8B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ $1w($dst$$Register, $dst$$Register, $tmp$$Register); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +REDUCE_LOGIC_OP_8B(and, And) +REDUCE_LOGIC_OP_8B(orr, Or) +REDUCE_LOGIC_OP_8B(eor, Xor) +define(`REDUCE_LOGIC_OP_16B', ` +instruct reduce_$1`'16B(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "$3 $dst, $dst, $tmp\n\t" + "$3 $dst, $dst, $dst, LSR #32\n\t" + "$1w $dst, $dst, $dst, LSR #16\n\t" + "$1w $dst, $dst, $dst, LSR #8\n\t" + "$1w $dst, $isrc, $dst\n\t" + "sxtb $dst, $dst\t# $1 reduction16B" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ $3($dst$$Register, $dst$$Register, $tmp$$Register); + __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 8); + __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_LOGIC_OP_16B(and, And, andr) +REDUCE_LOGIC_OP_16B(orr, Or, orr ) +REDUCE_LOGIC_OP_16B(eor, Xor, eor ) +dnl +define(`REDUCE_LOGIC_OP_4S', ` +instruct reduce_$1`'4S(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "umov $dst, $vsrc, S, 1\n\t" + "$1w $dst, $dst, $tmp\n\t" + "$1w $dst, $dst, $dst, LSR #16\n\t" + "$1w $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# $1 reduction4S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ $1w($dst$$Register, $dst$$Register, $tmp$$Register); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +REDUCE_LOGIC_OP_4S(and, And) +REDUCE_LOGIC_OP_4S(orr, Or) +REDUCE_LOGIC_OP_4S(eor, Xor) +dnl +define(`REDUCE_LOGIC_OP_8S', ` +instruct reduce_$1`'8S(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "$3 $dst, $dst, $tmp\n\t" + "$3 $dst, $dst, $dst, LSR #32\n\t" + "$1w $dst, $dst, $dst, LSR #16\n\t" + "$1w $dst, $isrc, $dst\n\t" + "sxth $dst, $dst\t# $1 reduction8S" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ $3($dst$$Register, $dst$$Register, $tmp$$Register); + __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ $1w($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 16); + __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_LOGIC_OP_8S(and, And, andr) +REDUCE_LOGIC_OP_8S(orr, Or, orr ) +REDUCE_LOGIC_OP_8S(eor, Xor, eor ) +dnl +define(`REDUCE_LOGIC_OP_2I', ` +instruct reduce_$1`'2I(iRegINoSp dst, iRegIorL2I isrc, vecD vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, S, 0\n\t" + "$1w $dst, $tmp, $isrc\n\t" + "umov $tmp, $vsrc, S, 1\n\t" + "$1w $dst, $tmp, $dst\t# $1 reduction2I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 0); + __ $1w($dst$$Register, $tmp$$Register, $isrc$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ S, 1); + __ $1w($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +REDUCE_LOGIC_OP_2I(and, And) +REDUCE_LOGIC_OP_2I(orr, Or) +REDUCE_LOGIC_OP_2I(eor, Xor) +dnl +define(`REDUCE_LOGIC_OP_4I', ` +instruct reduce_$1`'4I(iRegINoSp dst, iRegIorL2I isrc, vecX vsrc, iRegINoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "umov $dst, $vsrc, D, 1\n\t" + "$3 $dst, $dst, $tmp\n\t" + "$3 $dst, $dst, $dst, LSR #32\n\t" + "$1w $dst, $isrc, $dst\t# $1 reduction4I" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ umov($dst$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ $3($dst$$Register, $dst$$Register, $tmp$$Register); + __ $3($dst$$Register, $dst$$Register, $dst$$Register, Assembler::LSR, 32); + __ $1w($dst$$Register, $isrc$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_LOGIC_OP_4I(and, And, andr) +REDUCE_LOGIC_OP_4I(orr, Or, orr ) +REDUCE_LOGIC_OP_4I(eor, Xor, eor ) +dnl +define(`REDUCE_LOGIC_OP_2L', ` +instruct reduce_$1`'2L(iRegLNoSp dst, iRegL isrc, vecX vsrc, iRegLNoSp tmp) +%{ + predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2ReductionV isrc vsrc)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp); + format %{ "umov $tmp, $vsrc, D, 0\n\t" + "$3 $dst, $isrc, $tmp\n\t" + "umov $tmp, $vsrc, D, 1\n\t" + "$3 $dst, $dst, $tmp\t# $1 reduction2L" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 0); + __ $3($dst$$Register, $isrc$$Register, $tmp$$Register); + __ umov($tmp$$Register, as_FloatRegister($vsrc$$reg), __ D, 1); + __ $3($dst$$Register, $dst$$Register, $tmp$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +REDUCE_LOGIC_OP_2L(and, And, andr) +REDUCE_LOGIC_OP_2L(orr, Or, orr ) +REDUCE_LOGIC_OP_2L(eor, Xor, eor ) +dnl + +// ------------------------------ Vector insert --------------------------------- +define(`VECTOR_INSERT_I', ` +instruct insert$1$2`'(vec$3 dst, vec$3 src, iReg$4`'ORL2I($4) val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + format %{ "orr $dst, T$5, $src, $src\n\t" + "mov $dst, T$1`'iTYPE2SIMD($2), $idx, $val\t# insert into vector($1$2)" %} + ins_encode %{ + if (as_FloatRegister($dst$$reg) != as_FloatRegister($src$$reg)) { + __ orr(as_FloatRegister($dst$$reg), __ T$5, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + } + __ mov(as_FloatRegister($dst$$reg), __ T$1`'iTYPE2SIMD($2), $idx$$constant, $val$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_INSERT_I(8, B, D, I, 8B) +VECTOR_INSERT_I(16, B, X, I, 16B) +VECTOR_INSERT_I(4, S, D, I, 8B) +VECTOR_INSERT_I(8, S, X, I, 16B) +VECTOR_INSERT_I(2, I, D, I, 8B) +VECTOR_INSERT_I(4, I, X, I, 16B) +VECTOR_INSERT_I(2, L, X, L, 16B) +dnl +define(`VECTOR_INSERT_F', ` +instruct insert$1`'(vec$2 dst, vec$2 src, vReg$3 val, immI idx) +%{ + predicate(n->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorInsert (Binary src val) idx)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "orr $dst, T$4, $src, $src\n\t" + "ins $dst, $5, $val, $idx, 0\t# insert into vector($1)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T$4, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + __ ins(as_FloatRegister($dst$$reg), __ $5, + as_FloatRegister($val$$reg), $idx$$constant, 0); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_INSERT_F(2F, D, F, 8B, S) +VECTOR_INSERT_F(4F, X, F, 16B, S) +VECTOR_INSERT_F(2D, X, D, 16B, D) +dnl + +// ------------------------------ Vector extract --------------------------------- +define(`VECTOR_EXTRACT_I', ` +instruct extract$1$2`'(iReg$3NoSp dst, vec$4 src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == $1); + match(Set dst (Extract$2 src idx)); + ins_cost(INSN_COST); + format %{ "$5mov $dst, $src, $6, $idx\t# extract from vector($1$2)" %} + ins_encode %{ + __ $5mov($dst$$Register, as_FloatRegister($src$$reg), __ $6, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VECTOR_EXTRACT_I(8, B, I, D, s, B) +VECTOR_EXTRACT_I(16, B, I, X, s, B) +VECTOR_EXTRACT_I(4, S, I, D, s, H) +VECTOR_EXTRACT_I(8, S, I, X, s, H) +VECTOR_EXTRACT_I(2, I, I, D, u, S) +VECTOR_EXTRACT_I(4, I, I, X, u, S) +VECTOR_EXTRACT_I(2, L, L, X, u, D) +dnl +define(`VECTOR_EXTRACT_F', ` +instruct extract$1$2`'(vReg$2 dst, vec$3 src, immI idx) +%{ + predicate(n->in(1)->bottom_type()->is_vect()->length() == $1); + match(Set dst (Extract$2 src idx)); + ins_cost(INSN_COST); + format %{ "ins $dst, $4, $src, 0, $idx\t# extract from vector($1$2)" %} + ins_encode %{ + __ ins(as_FloatRegister($dst$$reg), __ $4, + as_FloatRegister($src$$reg), 0, $idx$$constant); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 +VECTOR_EXTRACT_F(2, F, D, S) +VECTOR_EXTRACT_F(4, F, X, S) +VECTOR_EXTRACT_F(2, D, X, D) +dnl + +// ------------------------------ Vector comparison --------------------------------- +define(`VECTOR_CMP_EQ_GT_GE', ` +instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond) +%{ + predicate(n->as_Vector()->length() == $2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "$6cm$1 $dst, $src1, $src2\t# vector cmp ($2$3)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ $6cm$1(as_FloatRegister($dst$$reg), __ T$2$5, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop$7); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 +VECTOR_CMP_EQ_GT_GE(eq, 8, B, D, B, , 64) +VECTOR_CMP_EQ_GT_GE(eq, 16,B, X, B, , 128) +VECTOR_CMP_EQ_GT_GE(eq, 4, S, D, H, , 64) +VECTOR_CMP_EQ_GT_GE(eq, 8, S, X, H, , 128) +VECTOR_CMP_EQ_GT_GE(eq, 2, I, D, S, , 64) +VECTOR_CMP_EQ_GT_GE(eq, 4, I, X, S, , 128) +VECTOR_CMP_EQ_GT_GE(eq, 2, L, X, D, , 128) +VECTOR_CMP_EQ_GT_GE(eq, 2, F, D, S, f, 64) +VECTOR_CMP_EQ_GT_GE(eq, 4, F, X, S, f, 128) +VECTOR_CMP_EQ_GT_GE(eq, 2, D, X, D, f, 128) +VECTOR_CMP_EQ_GT_GE(gt, 8, B, D, B, , 64) +VECTOR_CMP_EQ_GT_GE(gt, 16,B, X, B, , 128) +VECTOR_CMP_EQ_GT_GE(gt, 4, S, D, H, , 64) +VECTOR_CMP_EQ_GT_GE(gt, 8, S, X, H, , 128) +VECTOR_CMP_EQ_GT_GE(gt, 2, I, D, S, , 64) +VECTOR_CMP_EQ_GT_GE(gt, 4, I, X, S, , 128) +VECTOR_CMP_EQ_GT_GE(gt, 2, L, X, D, , 128) +VECTOR_CMP_EQ_GT_GE(gt, 2, F, D, S, f, 64) +VECTOR_CMP_EQ_GT_GE(gt, 4, F, X, S, f, 128) +VECTOR_CMP_EQ_GT_GE(gt, 2, D, X, D, f, 128) +VECTOR_CMP_EQ_GT_GE(ge, 8, B, D, B, , 64) +VECTOR_CMP_EQ_GT_GE(ge, 16,B, X, B, , 128) +VECTOR_CMP_EQ_GT_GE(ge, 4, S, D, H, , 64) +VECTOR_CMP_EQ_GT_GE(ge, 8, S, X, H, , 128) +VECTOR_CMP_EQ_GT_GE(ge, 2, I, D, S, , 64) +VECTOR_CMP_EQ_GT_GE(ge, 4, I, X, S, , 128) +VECTOR_CMP_EQ_GT_GE(ge, 2, L, X, D, , 128) +VECTOR_CMP_EQ_GT_GE(ge, 2, F, D, S, f, 64) +VECTOR_CMP_EQ_GT_GE(ge, 4, F, X, S, f, 128) +VECTOR_CMP_EQ_GT_GE(ge, 2, D, X, D, f, 128) +dnl +define(`VECTOR_CMP_NE', ` +instruct vcmne$1$2`'(vec$3 dst, vec$3 src1, vec$3 src2, immI cond) +%{ + predicate(n->as_Vector()->length() == $1 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::ne && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($2)); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "$5cmeq $dst, $src1, $src2\n\t# vector cmp ($1$2)" + "not $dst, $dst\t" %} + ins_cost(INSN_COST); + ins_encode %{ + __ $5cmeq(as_FloatRegister($dst$$reg), __ T$1$4, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($dst$$reg), __ T$6, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VECTOR_CMP_NE(8, B, D, B, , 8B) +VECTOR_CMP_NE(16,B, X, B, , 16B) +VECTOR_CMP_NE(4, S, D, H, , 8B) +VECTOR_CMP_NE(8, S, X, H, , 16B) +VECTOR_CMP_NE(2, I, D, S, , 8B) +VECTOR_CMP_NE(4, I, X, S, , 16B) +VECTOR_CMP_NE(2, L, X, D, , 16B) +VECTOR_CMP_NE(2, F, D, S, f, 8B) +VECTOR_CMP_NE(4, F, X, S, f, 16B) +VECTOR_CMP_NE(2, D, X, D, f, 16B) +dnl +define(`VECTOR_CMP_LT_LE', ` +instruct vcm$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2, immI cond) +%{ + predicate(n->as_Vector()->length() == $2 && + n->as_VectorMaskCmp()->get_predicate() == BoolTest::$1 && + n->in(1)->in(1)->bottom_type()->is_vect()->element_basic_type() == T_`'TYPE2DATATYPE($3)); + match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); + format %{ "$6cm$7 $dst, $src2, $src1\t# vector cmp ($2$3)" %} + ins_cost(INSN_COST); + ins_encode %{ + __ $6cm$7(as_FloatRegister($dst$$reg), __ T$2$5, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vdop$8); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 +VECTOR_CMP_LT_LE(lt, 8, B, D, B, , gt, 64) +VECTOR_CMP_LT_LE(lt, 16,B, X, B, , gt, 128) +VECTOR_CMP_LT_LE(lt, 4, S, D, H, , gt, 64) +VECTOR_CMP_LT_LE(lt, 8, S, X, H, , gt, 128) +VECTOR_CMP_LT_LE(lt, 2, I, D, S, , gt, 64) +VECTOR_CMP_LT_LE(lt, 4, I, X, S, , gt, 128) +VECTOR_CMP_LT_LE(lt, 2, L, X, D, , gt, 128) +VECTOR_CMP_LT_LE(lt, 2, F, D, S, f, gt, 64) +VECTOR_CMP_LT_LE(lt, 4, F, X, S, f, gt, 128) +VECTOR_CMP_LT_LE(lt, 2, D, X, D, f, gt, 128) +VECTOR_CMP_LT_LE(le, 8, B, D, B, , ge, 64) +VECTOR_CMP_LT_LE(le, 16,B, X, B, , ge, 128) +VECTOR_CMP_LT_LE(le, 4, S, D, H, , ge, 64) +VECTOR_CMP_LT_LE(le, 8, S, X, H, , ge, 128) +VECTOR_CMP_LT_LE(le, 2, I, D, S, , ge, 64) +VECTOR_CMP_LT_LE(le, 4, I, X, S, , ge, 128) +VECTOR_CMP_LT_LE(le, 2, L, X, D, , ge, 128) +VECTOR_CMP_LT_LE(le, 2, F, D, S, f, ge, 64) +VECTOR_CMP_LT_LE(le, 4, F, X, S, f, ge, 128) +VECTOR_CMP_LT_LE(le, 2, D, X, D, f, ge, 128) +dnl + +// ------------------------------ Vector mul ----------------------------------- + +instruct vmul2L(vecX dst, vecX src1, vecX src2, iRegLNoSp tmp1, iRegLNoSp tmp2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVL src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp1, TEMP tmp2); + format %{ "umov $tmp1, $src1, D, 0\n\t" + "umov $tmp2, $src2, D, 0\n\t" + "mul $tmp2, $tmp2, $tmp1\n\t" + "mov $dst, T2D, 0, $tmp2\t# insert into vector(2L)\n\t" + "umov $tmp1, $src1, D, 1\n\t" + "umov $tmp2, $src2, D, 1\n\t" + "mul $tmp2, $tmp2, $tmp1\n\t" + "mov $dst, T2D, 1, $tmp2\t# insert into vector(2L)\n\t" + %} + ins_encode %{ + __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 0); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 0); + __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); + __ mov(as_FloatRegister($dst$$reg), __ T2D, 0, $tmp2$$Register); + __ umov($tmp1$$Register, as_FloatRegister($src1$$reg), __ D, 1); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ D, 1); + __ mul(as_Register($tmp2$$reg), as_Register($tmp2$$reg), as_Register($tmp1$$reg)); + __ mov(as_FloatRegister($dst$$reg), __ T2D, 1, $tmp2$$Register); + %} + ins_pipe(pipe_slow); +%} + +// --------------------------------- Vector not -------------------------------- +dnl +define(`MATCH_RULE', `ifelse($1, I, +`match(Set dst (XorV src (ReplicateB m1))); + match(Set dst (XorV src (ReplicateS m1))); + match(Set dst (XorV src (ReplicateI m1)));', +`match(Set dst (XorV src (ReplicateL m1)));')')dnl +dnl +define(`VECTOR_NOT', ` +instruct vnot$1$2`'(vec$3 dst, vec$3 src, imm$2_M1 m1) +%{ + predicate(n->as_Vector()->length_in_bytes() == $4); + MATCH_RULE($2) + ins_cost(INSN_COST); + format %{ "not $dst, $src\t# vector ($5)" %} + ins_encode %{ + __ notr(as_FloatRegister($dst$$reg), __ T$5, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 +VECTOR_NOT(2, I, D, 8, 8B) +VECTOR_NOT(4, I, X, 16, 16B) +VECTOR_NOT(2, L, X, 16, 16B) +undefine(MATCH_RULE) +dnl +// ------------------------------ Vector max/min ------------------------------- +dnl +define(`PREDICATE', `ifelse($1, 8B, +`predicate((n->as_Vector()->length() == 4 || n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', +`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_$3);')')dnl +dnl +define(`VECTOR_MAX_MIN_INT', ` +instruct v$1$2$3`'(vec$4 dst, vec$4 src1, vec$4 src2) +%{ + PREDICATE(`$2$3', $2, TYPE2DATATYPE($3)) + match(Set dst ($5V src1 src2)); + ins_cost(INSN_COST); + format %{ "$1v $dst, $src1, $src2\t# vector ($2$3)" %} + ins_encode %{ + __ $1v(as_FloatRegister($dst$$reg), __ T$2`'iTYPE2SIMD($3), + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(vdop$6); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VECTOR_MAX_MIN_INT(max, 8, B, D, Max, 64) +VECTOR_MAX_MIN_INT(max, 16, B, X, Max, 128) +VECTOR_MAX_MIN_INT(max, 4, S, D, Max, 64) +VECTOR_MAX_MIN_INT(max, 8, S, X, Max, 128) +VECTOR_MAX_MIN_INT(max, 2, I, D, Max, 64) +VECTOR_MAX_MIN_INT(max, 4, I, X, Max, 128) +VECTOR_MAX_MIN_INT(min, 8, B, D, Min, 64) +VECTOR_MAX_MIN_INT(min, 16, B, X, Min, 128) +VECTOR_MAX_MIN_INT(min, 4, S, D, Min, 64) +VECTOR_MAX_MIN_INT(min, 8, S, X, Min, 128) +VECTOR_MAX_MIN_INT(min, 2, I, D, Min, 64) +VECTOR_MAX_MIN_INT(min, 4, I, X, Min, 128) +undefine(PREDICATE) +dnl +define(`VECTOR_MAX_MIN_LONG', ` +instruct v$1`'2L`'(vecX dst, vecX src1, vecX src2) +%{ + predicate(n->as_Vector()->length() == 2 && n->bottom_type()->is_vect()->element_basic_type() == T_LONG); + match(Set dst ($2V src1 src2)); + ins_cost(INSN_COST); + effect(TEMP dst); + format %{ "cmgt $dst, $src1, $src2\t# vector (2L)\n\t" + "bsl $dst, $$3, $$4\t# vector (16B)" %} + ins_encode %{ + __ cmgt(as_FloatRegister($dst$$reg), __ T2D, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ bsl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($$3$$reg), as_FloatRegister($$4$$reg)); + %} + ins_pipe(vdop128); +%}')dnl +dnl $1 $2 $3 $4 +VECTOR_MAX_MIN_LONG(max, Max, src1, src2) +VECTOR_MAX_MIN_LONG(min, Min, src2, src1) +dnl + +// --------------------------------- blend (bsl) ---------------------------- +dnl +define(`VECTOR_BSL', ` +instruct vbsl$1B`'(vec$2 dst, vec$2 src1, vec$2 src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == $1); + match(Set dst (VectorBlend (Binary src1 src2) dst)); + ins_cost(INSN_COST); + format %{ "bsl $dst, $src2, $src1\t# vector ($1B)" %} + ins_encode %{ + __ bsl(as_FloatRegister($dst$$reg), __ T$1B, + as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); + %} + ins_pipe(vlogical$3); +%}')dnl +dnl $1 $2 $3 +VECTOR_BSL(8, D, 64) +VECTOR_BSL(16, X, 128) +dnl + +// --------------------------------- Load/store Mask ---------------------------- +dnl +define(`PREDICATE', `ifelse($1, load, +`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', +`predicate(n->as_Vector()->length() == $2);')')dnl +dnl +define(`VECTOR_LOAD_STORE_MASK_B', ` +instruct $1mask$2B`'(vec$3 dst, vec$3 src $5 $6) +%{ + PREDICATE($1, $2) + match(Set dst (Vector$4Mask src $6)); + ins_cost(INSN_COST); + format %{ "negr $dst, $src\t# $1 mask ($2B to $2B)" %} + ins_encode %{ + __ negr(as_FloatRegister($dst$$reg), __ T$2B, as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 +VECTOR_LOAD_STORE_MASK_B(load, 8, D, Load) +VECTOR_LOAD_STORE_MASK_B(load, 16, X, Load) +VECTOR_LOAD_STORE_MASK_B(store, 8, D, Store, `, immI_1', size) +VECTOR_LOAD_STORE_MASK_B(store, 16, X, Store, `, immI_1', size) +undefine(PREDICATE)dnl +dnl +define(`PREDICATE', `ifelse($1, load, +`predicate(n->as_Vector()->length() == $2 && n->bottom_type()->is_vect()->element_basic_type() == T_SHORT);', +`predicate(n->as_Vector()->length() == $2);')')dnl +dnl +define(`VECTOR_LOAD_STORE_MASK_S', ` +instruct $1mask$2S`'(vec$3 dst, vec$4 src $9 $10) +%{ + PREDICATE($1, $2) + match(Set dst (Vector$5Mask src $10)); + ins_cost(INSN_COST); + format %{ "$6 $dst, $src\n\t" + "negr $dst, $dst\t# $1 mask ($2$7 to $2$8)" %} + ins_encode %{ + __ $6(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($src$$reg), __ T8$7); + __ negr(as_FloatRegister($dst$$reg), __ T8$8, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10 +VECTOR_LOAD_STORE_MASK_S(load, 4, D, D, Load, uxtl, B, H) +VECTOR_LOAD_STORE_MASK_S(load, 8, X, D, Load, uxtl, B, H) +VECTOR_LOAD_STORE_MASK_S(store, 4, D, D, Store, xtn, H, B, `, immI_2', size) +VECTOR_LOAD_STORE_MASK_S(store, 8, D, X, Store, xtn, H, B, `, immI_2', size) +undefine(PREDICATE)dnl +dnl +define(`PREDICATE', `ifelse($1, load, +`predicate(n->as_Vector()->length() == $2 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT));', +`predicate(n->as_Vector()->length() == $2);')')dnl +dnl +define(`VECTOR_LOAD_STORE_MASK_I', ` +instruct $1mask$2I`'(vec$3 dst, vec$4 src $12 $13) +%{ + PREDICATE($1, $2) + match(Set dst (Vector$5Mask src $13)); + ins_cost(INSN_COST); + format %{ "$6 $dst, $src\t# $2$7 to $2$8\n\t" + "$6 $dst, $dst\t# $2$8 to $2$9\n\t" + "negr $dst, $dst\t# $1 mask ($2$7 to $2$9)" %} + ins_encode %{ + __ $6(as_FloatRegister($dst$$reg), __ T$10$8, as_FloatRegister($src$$reg), __ T$10$7); + __ $6(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg), __ T$11$8); + __ negr(as_FloatRegister($dst$$reg), __ T$11$9, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 $4 $5 $6 $7 $8 $9 $10$11 $12 $13 +VECTOR_LOAD_STORE_MASK_I(load, 2, D, D, Load, uxtl, B, H, S, 8, 4) +VECTOR_LOAD_STORE_MASK_I(load, 4, X, D, Load, uxtl, B, H, S, 8, 4) +VECTOR_LOAD_STORE_MASK_I(store, 2, D, D, Store, xtn, S, H, B, 4, 8, `, immI_4', size) +VECTOR_LOAD_STORE_MASK_I(store, 4, D, X, Store, xtn, S, H, B, 4, 8, `, immI_4', size) +undefine(PREDICATE) +dnl +instruct loadmask2L(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 2 && + (n->bottom_type()->is_vect()->element_basic_type() == T_LONG || + n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE)); + match(Set dst (VectorLoadMask src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 2B to 2S\n\t" + "uxtl $dst, $dst\t# 2S to 2I\n\t" + "uxtl $dst, $dst\t# 2I to 2L\n\t" + "neg $dst, $dst\t# load mask (2B to 2L)" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + __ uxtl(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg), __ T2S); + __ negr(as_FloatRegister($dst$$reg), __ T2D, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +instruct storemask2L(vecD dst, vecX src, immI_8 size) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (VectorStoreMask src size)); + ins_cost(INSN_COST); + format %{ "xtn $dst, $src\t# 2L to 2I\n\t" + "xtn $dst, $dst\t# 2I to 2S\n\t" + "xtn $dst, $dst\t# 2S to 2B\n\t" + "neg $dst, $dst\t# store mask (2L to 2B)" %} + ins_encode %{ + __ xtn(as_FloatRegister($dst$$reg), __ T2S, as_FloatRegister($src$$reg), __ T2D); + __ xtn(as_FloatRegister($dst$$reg), __ T4H, as_FloatRegister($dst$$reg), __ T4S); + __ xtn(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg), __ T8H); + __ negr(as_FloatRegister($dst$$reg), __ T8B, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- LOAD_IOTA_INDICES---------------------------------- +dnl +define(`PREDICATE', `ifelse($1, 8, +`predicate((n->as_Vector()->length() == 2 || n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8) && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);', +`predicate(n->as_Vector()->length() == 16 && n->bottom_type()->is_vect()->element_basic_type() == T_BYTE);')')dnl +dnl +define(`VECTOR_LOAD_CON', ` +instruct loadcon$1B`'(vec$2 dst, immI0 src) +%{ + PREDICATE($1) + match(Set dst (VectorLoadConst src)); + ins_cost(INSN_COST); + format %{ "ldr $dst, CONSTANT_MEMORY\t# load iota indices" %} + ins_encode %{ + __ lea(rscratch1, ExternalAddress(StubRoutines::aarch64::vector_iota_indices())); + __ ldr$3(as_FloatRegister($dst$$reg), rscratch1); + %} + ins_pipe(pipe_class_memory); +%}')dnl +dnl $1 $2 $3 +VECTOR_LOAD_CON(8, D, d) +VECTOR_LOAD_CON(16, X, q) +undefine(PREDICATE) +dnl +//-------------------------------- LOAD_SHUFFLE ---------------------------------- +dnl +define(`VECTOR_LOAD_SHUFFLE_B', ` +instruct loadshuffle$1B`'(vec$2 dst, vec$2 src) +%{ + predicate(n->as_Vector()->length() == $1 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "mov $dst, $src\t# get $1B shuffle" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T$1B, + as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 +VECTOR_LOAD_SHUFFLE_B(8, D) +VECTOR_LOAD_SHUFFLE_B(16, X) +dnl +define(`VECTOR_LOAD_SHUFFLE_S', ` +instruct loadshuffle$1S`'(vec$2 dst, vec$3 src) +%{ + predicate(n->as_Vector()->length() == $1 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# $1B to $1H" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + %} + ins_pipe(pipe_class_default); +%}')dnl +dnl $1 $2 $3 +VECTOR_LOAD_SHUFFLE_S(4, D, D) +VECTOR_LOAD_SHUFFLE_S(8, X, D) +dnl + +instruct loadshuffle4I(vecX dst, vecD src) +%{ + predicate(n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorLoadShuffle src)); + ins_cost(INSN_COST); + format %{ "uxtl $dst, $src\t# 4B to 4H \n\t" + "uxtl $dst, $dst\t# 4H to 4S" %} + ins_encode %{ + __ uxtl(as_FloatRegister($dst$$reg), __ T8H, as_FloatRegister($src$$reg), __ T8B); + __ uxtl(as_FloatRegister($dst$$reg), __ T4S, as_FloatRegister($dst$$reg), __ T4H); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- Rearrange ------------------------------------- +// Here is an example that rearranges a NEON vector with 4 ints: +// Rearrange V1 int[a0, a1, a2, a3] to V2 int[a2, a3, a0, a1] +// 1. Get the indices of V1 and store them as Vi byte[0, 1, 2, 3]. +// 2. Convert Vi byte[0, 1, 2, 3] to the indices of V2 and also store them as Vi byte[2, 3, 0, 1]. +// 3. Unsigned extend Long Vi from byte[2, 3, 0, 1] to int[2, 3, 0, 1]. +// 4. Multiply Vi int[2, 3, 0, 1] with constant int[0x04040404, 0x04040404, 0x04040404, 0x04040404] +// and get tbl base Vm int[0x08080808, 0x0c0c0c0c, 0x00000000, 0x04040404]. +// 5. Add Vm with constant int[0x03020100, 0x03020100, 0x03020100, 0x03020100] +// and get tbl index Vm int[0x0b0a0908, 0x0f0e0d0c, 0x03020100, 0x07060504] +// 6. Use Vm as index register, and use V1 as table register. +// Then get V2 as the result by tbl NEON instructions. +// Notes: +// Step 1 matches VectorLoadConst. +// Step 3 matches VectorLoadShuffle. +// Step 4, 5, 6 match VectorRearrange. +// For VectorRearrange short/int, the reason why such complex calculation is +// required is because NEON tbl supports bytes table only, so for short/int, we +// need to lookup 2/4 bytes as a group. For VectorRearrange long, we use bsl +// to implement rearrange. +define(`VECTOR_REARRANGE_B', ` +instruct rearrange$1B`'(vec$2 dst, vec$2 src, vec$2 shuffle) +%{ + predicate(n->as_Vector()->length() == $1 && + n->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst); + format %{ "tbl $dst, {$dst}, $shuffle\t# rearrange $1B" %} + ins_encode %{ + __ tbl(as_FloatRegister($dst$$reg), __ T$1B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($shuffle$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +VECTOR_REARRANGE_B(8, D) +VECTOR_REARRANGE_B(16, X) +dnl +define(`VECTOR_REARRANGE_S', ` +instruct rearrange$1S`'(vec$2 dst, vec$2 src, vec$2 shuffle, vec$2 tmp0, vec$2 tmp1) +%{ + predicate(n->as_Vector()->length() == $1 && + n->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); + format %{ "mov $tmp0, CONSTANT\t# constant 0x0202020202020202\n\t" + "mov $tmp1, CONSTANT\t# constant 0x0100010001000100\n\t" + "mulv $dst, T$1H, $shuffle, $tmp0\n\t" + "addv $dst, T$3B, $dst, $tmp1\n\t" + "tbl $dst, {$src}, $dst\t# rearrange $1S" %} + ins_encode %{ + __ mov(as_FloatRegister($tmp0$$reg), __ T$3B, 0x02); + __ mov(as_FloatRegister($tmp1$$reg), __ T$1H, 0x0100); + __ mulv(as_FloatRegister($dst$$reg), __ T$1H, + as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); + __ addv(as_FloatRegister($dst$$reg), __ T$3B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); + __ tbl(as_FloatRegister($dst$$reg), __ T$3B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 $3 +VECTOR_REARRANGE_S(4, D, 8) +VECTOR_REARRANGE_S(8, X, 16) + +instruct rearrange4I(vecX dst, vecX src, vecX shuffle, vecX tmp0, vecX tmp1) +%{ + predicate(n->as_Vector()->length() == 4 && + (n->bottom_type()->is_vect()->element_basic_type() == T_INT || + n->bottom_type()->is_vect()->element_basic_type() == T_FLOAT)); + match(Set dst (VectorRearrange src shuffle)); + ins_cost(INSN_COST); + effect(TEMP_DEF dst, TEMP tmp0, TEMP tmp1); + format %{ "mov $tmp0, CONSTANT\t# constant 0x0404040404040404\n\t" + "mov $tmp1, CONSTANT\t# constant 0x0302010003020100\n\t" + "mulv $dst, T8H, $shuffle, $tmp0\n\t" + "addv $dst, T16B, $dst, $tmp1\n\t" + "tbl $dst, {$src}, $dst\t# rearrange 4I" %} + ins_encode %{ + __ mov(as_FloatRegister($tmp0$$reg), __ T16B, 0x04); + __ mov(as_FloatRegister($tmp1$$reg), __ T4S, 0x03020100); + __ mulv(as_FloatRegister($dst$$reg), __ T4S, + as_FloatRegister($shuffle$$reg), as_FloatRegister($tmp0$$reg)); + __ addv(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($dst$$reg), as_FloatRegister($tmp1$$reg)); + __ tbl(as_FloatRegister($dst$$reg), __ T16B, + as_FloatRegister($src$$reg), 1, as_FloatRegister($dst$$reg)); + %} + ins_pipe(pipe_slow); +%} + +//-------------------------------- Anytrue/alltrue ----------------------------- +dnl +define(`ANYTRUE_IN_MASK', ` +instruct anytrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::ne); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "addv $tmp, T$1B, $src1\t# src1 and src2 are the same\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($src1$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::NE); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +ANYTRUE_IN_MASK(8, D) +ANYTRUE_IN_MASK(16, X) +dnl +define(`ALLTRUE_IN_MASK', ` +instruct alltrue_in_mask$1B`'(iRegINoSp dst, vec$2 src1, vec$2 src2, vec$2 tmp, rFlagsReg cr) +%{ + predicate(static_cast(n)->get_predicate() == BoolTest::overflow); + match(Set dst (VectorTest src1 src2 )); + ins_cost(INSN_COST); + effect(TEMP tmp, KILL cr); + format %{ "andr $tmp, T$1B, $src1, $src2\t# src2 is maskAllTrue\n\t" + "notr $tmp, T$1B, $tmp\n\t" + "addv $tmp, T$1B, $tmp\n\t" + "umov $dst, $tmp, B, 0\n\t" + "cmp $dst, 0\n\t" + "cset $dst" %} + ins_encode %{ + __ andr(as_FloatRegister($tmp$$reg), __ T$1B, + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ notr(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg)); + __ addv(as_FloatRegister($tmp$$reg), __ T$1B, as_FloatRegister($tmp$$reg)); + __ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ cmpw($dst$$Register, zr); + __ csetw($dst$$Register, Assembler::EQ); + %} + ins_pipe(pipe_slow); +%}')dnl +dnl $1 $2 +ALLTRUE_IN_MASK(8, D) +ALLTRUE_IN_MASK(16, X) +dnl diff --git a/src/hotspot/cpu/aarch64/aarch64_sve.ad b/src/hotspot/cpu/aarch64/aarch64_sve.ad index 90442c7b8b6..f34d4890c70 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve.ad +++ b/src/hotspot/cpu/aarch64/aarch64_sve.ad @@ -159,6 +159,31 @@ source %{ case Op_ExtractL: case Op_ExtractS: case Op_ExtractUB: + // Vector API specific + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MaxReductionV: + case Op_MinReductionV: + case Op_LoadVectorGather: + case Op_StoreVectorScatter: + case Op_VectorBlend: + case Op_VectorCast: + case Op_VectorCastB2X: + case Op_VectorCastD2X: + case Op_VectorCastF2X: + case Op_VectorCastI2X: + case Op_VectorCastL2X: + case Op_VectorCastS2X: + case Op_VectorInsert: + case Op_VectorLoadConst: + case Op_VectorLoadMask: + case Op_VectorLoadShuffle: + case Op_VectorMaskCmp: + case Op_VectorRearrange: + case Op_VectorReinterpret: + case Op_VectorStoreMask: + case Op_VectorTest: return false; default: return true; @@ -846,9 +871,49 @@ instruct vpopcountI(vReg dst, vReg src) %{ // vector add reduction +instruct reduce_addB(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_BYTE); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (B)\n\t" + "smov $dst, $tmp, B, 0\n\t" + "addw $dst, $dst, $src1\n\t" + "sxtb $dst, $dst\t # add reduction B" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ B, + ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ B, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + __ sxtb($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + +instruct reduce_addS(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_SHORT); + match(Set dst (AddReductionVI src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) (H)\n\t" + "smov $dst, $tmp, H, 0\n\t" + "addw $dst, $dst, $src1\n\t" + "sxth $dst, $dst\t # add reduction H" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ H, + ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ H, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + __ sxth($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%} + instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT)); + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT); match(Set dst (AddReductionVI src1 src2)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); @@ -866,7 +931,7 @@ instruct reduce_addI(iRegINoSp dst, iRegIorL2I src1, vReg src2, vRegD tmp) %{ instruct reduce_addL(iRegLNoSp dst, iRegL src1, vReg src2, vRegD tmp) %{ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - (n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG)); + n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG); match(Set dst (AddReductionVL src1 src2)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); @@ -1264,7 +1329,7 @@ instruct vlsrL(vReg dst, vReg shift) %{ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (RShiftVB src shift)); + match(Set dst (RShiftVB src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (B)" %} ins_encode %{ @@ -1283,7 +1348,7 @@ instruct vasrB_imm(vReg dst, vReg src, immI shift) %{ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (RShiftVS src shift)); + match(Set dst (RShiftVS src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (H)" %} ins_encode %{ @@ -1302,7 +1367,7 @@ instruct vasrS_imm(vReg dst, vReg src, immI shift) %{ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (RShiftVI src shift)); + match(Set dst (RShiftVI src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (S)" %} ins_encode %{ @@ -1320,7 +1385,7 @@ instruct vasrI_imm(vReg dst, vReg src, immI shift) %{ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (RShiftVL src shift)); + match(Set dst (RShiftVL src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_asr $dst, $src, $shift\t# vector (sve) (D)" %} ins_encode %{ @@ -1338,7 +1403,7 @@ instruct vasrL_imm(vReg dst, vReg src, immI shift) %{ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (URShiftVB src shift)); + match(Set dst (URShiftVB src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (B)" %} ins_encode %{ @@ -1361,7 +1426,7 @@ instruct vlsrB_imm(vReg dst, vReg src, immI shift) %{ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (URShiftVS src shift)); + match(Set dst (URShiftVS src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (H)" %} ins_encode %{ @@ -1371,7 +1436,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ as_FloatRegister($src$$reg)); return; } - if (con >= 8) { + if (con >= 16) { __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); return; @@ -1384,7 +1449,7 @@ instruct vlsrS_imm(vReg dst, vReg src, immI shift) %{ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (URShiftVI src shift)); + match(Set dst (URShiftVI src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (S)" %} ins_encode %{ @@ -1402,7 +1467,7 @@ instruct vlsrI_imm(vReg dst, vReg src, immI shift) %{ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (URShiftVL src shift)); + match(Set dst (URShiftVL src (RShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsr $dst, $src, $shift\t# vector (sve) (D)" %} ins_encode %{ @@ -1420,7 +1485,7 @@ instruct vlsrL_imm(vReg dst, vReg src, immI shift) %{ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 16); - match(Set dst (LShiftVB src shift)); + match(Set dst (LShiftVB src (LShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (B)" %} ins_encode %{ @@ -1438,12 +1503,12 @@ instruct vlslB_imm(vReg dst, vReg src, immI shift) %{ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 8); - match(Set dst (LShiftVS src shift)); + match(Set dst (LShiftVS src (LShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (H)" %} ins_encode %{ int con = (int)$shift$$constant; - if (con >= 8) { + if (con >= 16) { __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); return; @@ -1456,7 +1521,7 @@ instruct vlslS_imm(vReg dst, vReg src, immI shift) %{ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 4); - match(Set dst (LShiftVI src shift)); + match(Set dst (LShiftVI src (LShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (S)" %} ins_encode %{ @@ -1469,7 +1534,7 @@ instruct vlslI_imm(vReg dst, vReg src, immI shift) %{ instruct vlslL_imm(vReg dst, vReg src, immI shift) %{ predicate(UseSVE > 0 && n->as_Vector()->length() >= 2); - match(Set dst (LShiftVL src shift)); + match(Set dst (LShiftVL src (LShiftCntV shift))); ins_cost(SVE_COST); format %{ "sve_lsl $dst, $src, $shift\t# vector (sve) (D)" %} ins_encode %{ diff --git a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 index 7bb76cc5941..7fe0861a717 100644 --- a/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 +++ b/src/hotspot/cpu/aarch64/aarch64_sve_ad.m4 @@ -146,6 +146,31 @@ source %{ case Op_ExtractL: case Op_ExtractS: case Op_ExtractUB: + // Vector API specific + case Op_AndReductionV: + case Op_OrReductionV: + case Op_XorReductionV: + case Op_MaxReductionV: + case Op_MinReductionV: + case Op_LoadVectorGather: + case Op_StoreVectorScatter: + case Op_VectorBlend: + case Op_VectorCast: + case Op_VectorCastB2X: + case Op_VectorCastD2X: + case Op_VectorCastF2X: + case Op_VectorCastI2X: + case Op_VectorCastL2X: + case Op_VectorCastS2X: + case Op_VectorInsert: + case Op_VectorLoadConst: + case Op_VectorLoadMask: + case Op_VectorLoadShuffle: + case Op_VectorMaskCmp: + case Op_VectorRearrange: + case Op_VectorReinterpret: + case Op_VectorStoreMask: + case Op_VectorTest: return false; default: return true; @@ -507,15 +532,38 @@ instruct vpopcountI(vReg dst, vReg src) %{ __ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg)); %} ins_pipe(pipe_slow); -%} +%}dnl +dnl +dnl REDUCE_ADD_EXT($1, $2, $3, $4, $5, $6, $7 ) +dnl REDUCE_ADD_EXT(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) +define(`REDUCE_ADD_EXT', ` +instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ + predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && + n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6); + match(Set dst ($2 src1 src2)); + effect(TEMP_DEF dst, TEMP tmp); + ins_cost(SVE_COST); + format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t" + "smov $dst, $tmp, $5, 0\n\t" + "addw $dst, $dst, $src1\n\t" + "$7 $dst, $dst\t # add reduction $5" %} + ins_encode %{ + __ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5, + ptrue, as_FloatRegister($src2$$reg)); + __ smov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0); + __ addw($dst$$Register, $dst$$Register, $src1$$Register); + __ $7($dst$$Register, $dst$$Register); + %} + ins_pipe(pipe_slow); +%}')dnl dnl dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 ) dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1) define(`REDUCE_ADD', ` instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{ predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 && - ELEMENT_SHORT_CHAR($6, n->in(2))); + n->in(2)->bottom_type()->is_vect()->element_basic_type() == $6); match(Set dst ($2 src1 src2)); effect(TEMP_DEF dst, TEMP tmp); ins_cost(SVE_COST); @@ -545,8 +593,10 @@ instruct $1($3 src1_dst, vReg src2) %{ %} ins_pipe(pipe_slow); %}')dnl -dnl + // vector add reduction +REDUCE_ADD_EXT(reduce_addB, AddReductionVI, iRegINoSp, iRegIorL2I, B, T_BYTE, sxtb) +REDUCE_ADD_EXT(reduce_addS, AddReductionVI, iRegINoSp, iRegIorL2I, H, T_SHORT, sxth) REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw) REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add) REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S) @@ -677,14 +727,14 @@ instruct $1(vReg dst, vReg shift) %{ ins_pipe(pipe_slow); %}')dnl dnl -dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 ) -dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn) +dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5, $6 ) +dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, op_name2, size, min_vec_len, insn) define(`VSHIFT_IMM_UNPREDICATE', ` instruct $1(vReg dst, vReg src, immI shift) %{ - predicate(UseSVE > 0 && n->as_Vector()->length() >= $4); - match(Set dst ($2 src shift)); + predicate(UseSVE > 0 && n->as_Vector()->length() >= $5); + match(Set dst ($2 src ($3 shift))); ins_cost(SVE_COST); - format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %} + format %{ "$6 $dst, $src, $shift\t# vector (sve) ($4)" %} ins_encode %{ int con = (int)$shift$$constant;dnl ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, ` @@ -693,16 +743,21 @@ ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, ` as_FloatRegister($src$$reg)); return; }')dnl -ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, ` - if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, ` +ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, ` + if (con >= 8) con = 7;')ifelse(eval(index(`$4', `H') == 0), 1, ` if (con >= 16) con = 15;')')dnl -ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, ` +ifelse(eval(index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0), 1, `ifelse(eval(index(`$4', `B') == 0), 1, ` if (con >= 8) { __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), as_FloatRegister($src$$reg)); return; - }') - __ $5(as_FloatRegister($dst$$reg), __ $3, + }')ifelse(eval(index(`$4', `H') == 0), 1, ` + if (con >= 16) { + __ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + return; + }')') + __ $6(as_FloatRegister($dst$$reg), __ $4, as_FloatRegister($src$$reg), con); %} ins_pipe(pipe_slow); @@ -736,18 +791,18 @@ VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr) VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr) VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr) VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr) -VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr) -VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr) -VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr) -VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr) -VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr) -VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr) -VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr) -VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr) -VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl) -VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl) -VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl) -VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, RShiftCntV, B, 16, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, RShiftCntV, H, 8, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, RShiftCntV, S, 4, sve_asr) +VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, RShiftCntV, D, 2, sve_asr) +VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, RShiftCntV, B, 16, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, RShiftCntV, H, 8, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, RShiftCntV, S, 4, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, RShiftCntV, D, 2, sve_lsr) +VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, LShiftCntV, B, 16, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, LShiftCntV, H, 8, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, LShiftCntV, S, 4, sve_lsl) +VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, LShiftCntV, D, 2, sve_lsl) VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE) VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT) VSHIFT_COUNT(vshiftcntI, S, 4, T_INT) diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp index 29f63ba69a4..c7fac2836b7 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.cpp @@ -590,7 +590,7 @@ void entry(CodeBuffer *cb) { __ stnp(r23, r29, Address(r12, 32)); // stnp x23, x29, [x12, #32] __ ldnp(r0, r6, Address(r21, -80)); // ldnp x0, x6, [x21, #-80] -// LdStSIMDOp +// LdStNEONOp __ ld1(v15, __ T8B, Address(r26)); // ld1 {v15.8B}, [x26] __ ld1(v23, v24, __ T16B, Address(__ post(r11, 32))); // ld1 {v23.16B, v24.16B}, [x11], 32 __ ld1(v8, v9, v10, __ T1D, Address(__ post(r23, r7))); // ld1 {v8.1D, v9.1D, v10.1D}, [x23], x7 @@ -614,11 +614,146 @@ void entry(CodeBuffer *cb) { __ ld4r(v0, v1, v2, v3, __ T4H, Address(__ post(r26, 8))); // ld4r {v0.4H, v1.4H, v2.4H, v3.4H}, [x26], 8 __ ld4r(v12, v13, v14, v15, __ T2S, Address(__ post(r25, r2))); // ld4r {v12.2S, v13.2S, v14.2S, v15.2S}, [x25], x2 -// SHA512SIMDOp - __ sha512h(v22, __ T2D, v27, v4); // sha512h q22, q27, v4.2D - __ sha512h2(v7, __ T2D, v6, v1); // sha512h2 q7, q6, v1.2D - __ sha512su0(v26, __ T2D, v15); // sha512su0 v26.2D, v15.2D - __ sha512su1(v2, __ T2D, v13, v13); // sha512su1 v2.2D, v13.2D, v13.2D +// NEONReduceInstruction + __ addv(v22, __ T8B, v23); // addv b22, v23.8B + __ addv(v27, __ T16B, v28); // addv b27, v28.16B + __ addv(v4, __ T4H, v5); // addv h4, v5.4H + __ addv(v7, __ T8H, v8); // addv h7, v8.8H + __ addv(v6, __ T4S, v7); // addv s6, v7.4S + __ smaxv(v1, __ T8B, v2); // smaxv b1, v2.8B + __ smaxv(v26, __ T16B, v27); // smaxv b26, v27.16B + __ smaxv(v15, __ T4H, v16); // smaxv h15, v16.4H + __ smaxv(v2, __ T8H, v3); // smaxv h2, v3.8H + __ smaxv(v13, __ T4S, v14); // smaxv s13, v14.4S + __ fmaxv(v13, __ T4S, v14); // fmaxv s13, v14.4S + __ sminv(v24, __ T8B, v25); // sminv b24, v25.8B + __ sminv(v23, __ T16B, v24); // sminv b23, v24.16B + __ sminv(v4, __ T4H, v5); // sminv h4, v5.4H + __ sminv(v19, __ T8H, v20); // sminv h19, v20.8H + __ sminv(v15, __ T4S, v16); // sminv s15, v16.4S + __ fminv(v0, __ T4S, v1); // fminv s0, v1.4S + +// TwoRegNEONOp + __ absr(v4, __ T8B, v5); // abs v4.8B, v5.8B + __ absr(v20, __ T16B, v21); // abs v20.16B, v21.16B + __ absr(v11, __ T4H, v12); // abs v11.4H, v12.4H + __ absr(v29, __ T8H, v30); // abs v29.8H, v30.8H + __ absr(v15, __ T2S, v16); // abs v15.2S, v16.2S + __ absr(v21, __ T4S, v22); // abs v21.4S, v22.4S + __ absr(v4, __ T2D, v5); // abs v4.2D, v5.2D + __ fabs(v14, __ T2S, v15); // fabs v14.2S, v15.2S + __ fabs(v22, __ T4S, v23); // fabs v22.4S, v23.4S + __ fabs(v25, __ T2D, v26); // fabs v25.2D, v26.2D + __ fneg(v6, __ T2S, v7); // fneg v6.2S, v7.2S + __ fneg(v12, __ T4S, v13); // fneg v12.4S, v13.4S + __ fneg(v14, __ T2D, v15); // fneg v14.2D, v15.2D + __ fsqrt(v13, __ T2S, v14); // fsqrt v13.2S, v14.2S + __ fsqrt(v14, __ T4S, v15); // fsqrt v14.4S, v15.4S + __ fsqrt(v9, __ T2D, v10); // fsqrt v9.2D, v10.2D + __ notr(v25, __ T8B, v26); // not v25.8B, v26.8B + __ notr(v28, __ T16B, v29); // not v28.16B, v29.16B + +// ThreeRegNEONOp + __ andr(v10, __ T8B, v11, v12); // and v10.8B, v11.8B, v12.8B + __ andr(v19, __ T16B, v20, v21); // and v19.16B, v20.16B, v21.16B + __ orr(v11, __ T8B, v12, v13); // orr v11.8B, v12.8B, v13.8B + __ orr(v17, __ T16B, v18, v19); // orr v17.16B, v18.16B, v19.16B + __ eor(v21, __ T8B, v22, v23); // eor v21.8B, v22.8B, v23.8B + __ eor(v15, __ T16B, v16, v17); // eor v15.16B, v16.16B, v17.16B + __ addv(v20, __ T8B, v21, v22); // add v20.8B, v21.8B, v22.8B + __ addv(v23, __ T16B, v24, v25); // add v23.16B, v24.16B, v25.16B + __ addv(v26, __ T4H, v27, v28); // add v26.4H, v27.4H, v28.4H + __ addv(v5, __ T8H, v6, v7); // add v5.8H, v6.8H, v7.8H + __ addv(v6, __ T2S, v7, v8); // add v6.2S, v7.2S, v8.2S + __ addv(v15, __ T4S, v16, v17); // add v15.4S, v16.4S, v17.4S + __ addv(v15, __ T2D, v16, v17); // add v15.2D, v16.2D, v17.2D + __ fadd(v25, __ T2S, v26, v27); // fadd v25.2S, v26.2S, v27.2S + __ fadd(v16, __ T4S, v17, v18); // fadd v16.4S, v17.4S, v18.4S + __ fadd(v27, __ T2D, v28, v29); // fadd v27.2D, v28.2D, v29.2D + __ subv(v24, __ T8B, v25, v26); // sub v24.8B, v25.8B, v26.8B + __ subv(v15, __ T16B, v16, v17); // sub v15.16B, v16.16B, v17.16B + __ subv(v25, __ T4H, v26, v27); // sub v25.4H, v26.4H, v27.4H + __ subv(v14, __ T8H, v15, v16); // sub v14.8H, v15.8H, v16.8H + __ subv(v10, __ T2S, v11, v12); // sub v10.2S, v11.2S, v12.2S + __ subv(v13, __ T4S, v14, v15); // sub v13.4S, v14.4S, v15.4S + __ subv(v14, __ T2D, v15, v16); // sub v14.2D, v15.2D, v16.2D + __ fsub(v20, __ T2S, v21, v22); // fsub v20.2S, v21.2S, v22.2S + __ fsub(v1, __ T4S, v2, v3); // fsub v1.4S, v2.4S, v3.4S + __ fsub(v22, __ T2D, v23, v24); // fsub v22.2D, v23.2D, v24.2D + __ mulv(v30, __ T8B, v31, v0); // mul v30.8B, v31.8B, v0.8B + __ mulv(v14, __ T16B, v15, v16); // mul v14.16B, v15.16B, v16.16B + __ mulv(v2, __ T4H, v3, v4); // mul v2.4H, v3.4H, v4.4H + __ mulv(v6, __ T8H, v7, v8); // mul v6.8H, v7.8H, v8.8H + __ mulv(v3, __ T2S, v4, v5); // mul v3.2S, v4.2S, v5.2S + __ mulv(v7, __ T4S, v8, v9); // mul v7.4S, v8.4S, v9.4S + __ fmul(v24, __ T2S, v25, v26); // fmul v24.2S, v25.2S, v26.2S + __ fmul(v0, __ T4S, v1, v2); // fmul v0.4S, v1.4S, v2.4S + __ fmul(v27, __ T2D, v28, v29); // fmul v27.2D, v28.2D, v29.2D + __ mlav(v29, __ T4H, v30, v31); // mla v29.4H, v30.4H, v31.4H + __ mlav(v5, __ T8H, v6, v7); // mla v5.8H, v6.8H, v7.8H + __ mlav(v5, __ T2S, v6, v7); // mla v5.2S, v6.2S, v7.2S + __ mlav(v29, __ T4S, v30, v31); // mla v29.4S, v30.4S, v31.4S + __ fmla(v11, __ T2S, v12, v13); // fmla v11.2S, v12.2S, v13.2S + __ fmla(v25, __ T4S, v26, v27); // fmla v25.4S, v26.4S, v27.4S + __ fmla(v0, __ T2D, v1, v2); // fmla v0.2D, v1.2D, v2.2D + __ mlsv(v30, __ T4H, v31, v0); // mls v30.4H, v31.4H, v0.4H + __ mlsv(v0, __ T8H, v1, v2); // mls v0.8H, v1.8H, v2.8H + __ mlsv(v17, __ T2S, v18, v19); // mls v17.2S, v18.2S, v19.2S + __ mlsv(v28, __ T4S, v29, v30); // mls v28.4S, v29.4S, v30.4S + __ fmls(v25, __ T2S, v26, v27); // fmls v25.2S, v26.2S, v27.2S + __ fmls(v9, __ T4S, v10, v11); // fmls v9.4S, v10.4S, v11.4S + __ fmls(v25, __ T2D, v26, v27); // fmls v25.2D, v26.2D, v27.2D + __ fdiv(v12, __ T2S, v13, v14); // fdiv v12.2S, v13.2S, v14.2S + __ fdiv(v15, __ T4S, v16, v17); // fdiv v15.4S, v16.4S, v17.4S + __ fdiv(v11, __ T2D, v12, v13); // fdiv v11.2D, v12.2D, v13.2D + __ maxv(v10, __ T8B, v11, v12); // smax v10.8B, v11.8B, v12.8B + __ maxv(v17, __ T16B, v18, v19); // smax v17.16B, v18.16B, v19.16B + __ maxv(v24, __ T4H, v25, v26); // smax v24.4H, v25.4H, v26.4H + __ maxv(v21, __ T8H, v22, v23); // smax v21.8H, v22.8H, v23.8H + __ maxv(v23, __ T2S, v24, v25); // smax v23.2S, v24.2S, v25.2S + __ maxv(v0, __ T4S, v1, v2); // smax v0.4S, v1.4S, v2.4S + __ fmax(v16, __ T2S, v17, v18); // fmax v16.2S, v17.2S, v18.2S + __ fmax(v10, __ T4S, v11, v12); // fmax v10.4S, v11.4S, v12.4S + __ fmax(v6, __ T2D, v7, v8); // fmax v6.2D, v7.2D, v8.2D + __ minv(v28, __ T8B, v29, v30); // smin v28.8B, v29.8B, v30.8B + __ minv(v6, __ T16B, v7, v8); // smin v6.16B, v7.16B, v8.16B + __ minv(v5, __ T4H, v6, v7); // smin v5.4H, v6.4H, v7.4H + __ minv(v5, __ T8H, v6, v7); // smin v5.8H, v6.8H, v7.8H + __ minv(v20, __ T2S, v21, v22); // smin v20.2S, v21.2S, v22.2S + __ minv(v17, __ T4S, v18, v19); // smin v17.4S, v18.4S, v19.4S + __ fmin(v15, __ T2S, v16, v17); // fmin v15.2S, v16.2S, v17.2S + __ fmin(v17, __ T4S, v18, v19); // fmin v17.4S, v18.4S, v19.4S + __ fmin(v29, __ T2D, v30, v31); // fmin v29.2D, v30.2D, v31.2D + __ cmeq(v26, __ T8B, v27, v28); // cmeq v26.8B, v27.8B, v28.8B + __ cmeq(v28, __ T16B, v29, v30); // cmeq v28.16B, v29.16B, v30.16B + __ cmeq(v1, __ T4H, v2, v3); // cmeq v1.4H, v2.4H, v3.4H + __ cmeq(v27, __ T8H, v28, v29); // cmeq v27.8H, v28.8H, v29.8H + __ cmeq(v0, __ T2S, v1, v2); // cmeq v0.2S, v1.2S, v2.2S + __ cmeq(v20, __ T4S, v21, v22); // cmeq v20.4S, v21.4S, v22.4S + __ cmeq(v28, __ T2D, v29, v30); // cmeq v28.2D, v29.2D, v30.2D + __ fcmeq(v15, __ T2S, v16, v17); // fcmeq v15.2S, v16.2S, v17.2S + __ fcmeq(v12, __ T4S, v13, v14); // fcmeq v12.4S, v13.4S, v14.4S + __ fcmeq(v10, __ T2D, v11, v12); // fcmeq v10.2D, v11.2D, v12.2D + __ cmgt(v28, __ T8B, v29, v30); // cmgt v28.8B, v29.8B, v30.8B + __ cmgt(v28, __ T16B, v29, v30); // cmgt v28.16B, v29.16B, v30.16B + __ cmgt(v19, __ T4H, v20, v21); // cmgt v19.4H, v20.4H, v21.4H + __ cmgt(v22, __ T8H, v23, v24); // cmgt v22.8H, v23.8H, v24.8H + __ cmgt(v10, __ T2S, v11, v12); // cmgt v10.2S, v11.2S, v12.2S + __ cmgt(v4, __ T4S, v5, v6); // cmgt v4.4S, v5.4S, v6.4S + __ cmgt(v30, __ T2D, v31, v0); // cmgt v30.2D, v31.2D, v0.2D + __ fcmgt(v20, __ T2S, v21, v22); // fcmgt v20.2S, v21.2S, v22.2S + __ fcmgt(v8, __ T4S, v9, v10); // fcmgt v8.4S, v9.4S, v10.4S + __ fcmgt(v30, __ T2D, v31, v0); // fcmgt v30.2D, v31.2D, v0.2D + __ cmge(v17, __ T8B, v18, v19); // cmge v17.8B, v18.8B, v19.8B + __ cmge(v10, __ T16B, v11, v12); // cmge v10.16B, v11.16B, v12.16B + __ cmge(v27, __ T4H, v28, v29); // cmge v27.4H, v28.4H, v29.4H + __ cmge(v2, __ T8H, v3, v4); // cmge v2.8H, v3.8H, v4.8H + __ cmge(v24, __ T2S, v25, v26); // cmge v24.2S, v25.2S, v26.2S + __ cmge(v4, __ T4S, v5, v6); // cmge v4.4S, v5.4S, v6.4S + __ cmge(v3, __ T2D, v4, v5); // cmge v3.2D, v4.2D, v5.2D + __ fcmge(v8, __ T2S, v9, v10); // fcmge v8.2S, v9.2S, v10.2S + __ fcmge(v22, __ T4S, v23, v24); // fcmge v22.4S, v23.4S, v24.4S + __ fcmge(v17, __ T2D, v18, v19); // fcmge v17.2D, v18.2D, v19.2D // SpecialCases __ ccmn(zr, zr, 3u, Assembler::LE); // ccmn xzr, xzr, #3, LE @@ -705,774 +840,160 @@ void entry(CodeBuffer *cb) { __ fmovd(v0, -1.0625); // fmov d0, #-1.0625 // LSEOp - __ swp(Assembler::xword, r24, r24, r4); // swp x24, x24, [x4] - __ ldadd(Assembler::xword, r20, r16, r0); // ldadd x20, x16, [x0] - __ ldbic(Assembler::xword, r4, r21, r11); // ldclr x4, x21, [x11] - __ ldeor(Assembler::xword, r30, r16, r22); // ldeor x30, x16, [x22] - __ ldorr(Assembler::xword, r4, r15, r23); // ldset x4, x15, [x23] - __ ldsmin(Assembler::xword, r26, r6, r12); // ldsmin x26, x6, [x12] - __ ldsmax(Assembler::xword, r15, r14, r15); // ldsmax x15, x14, [x15] - __ ldumin(Assembler::xword, r9, r25, r29); // ldumin x9, x25, [x29] - __ ldumax(Assembler::xword, r11, r20, r12); // ldumax x11, x20, [x12] + __ swp(Assembler::xword, r13, r5, r29); // swp x13, x5, [x29] + __ ldadd(Assembler::xword, r24, r21, r26); // ldadd x24, x21, [x26] + __ ldbic(Assembler::xword, r24, r3, r24); // ldclr x24, x3, [x24] + __ ldeor(Assembler::xword, r26, r23, r15); // ldeor x26, x23, [x15] + __ ldorr(Assembler::xword, r21, r3, r24); // ldset x21, x3, [x24] + __ ldsmin(Assembler::xword, r8, r25, r20); // ldsmin x8, x25, [x20] + __ ldsmax(Assembler::xword, r16, r17, r2); // ldsmax x16, x17, [x2] + __ ldumin(Assembler::xword, r1, r0, r24); // ldumin x1, x0, [x24] + __ ldumax(Assembler::xword, r4, r3, r12); // ldumax x4, x3, [x12] // LSEOp - __ swpa(Assembler::xword, r16, r22, r16); // swpa x16, x22, [x16] - __ ldadda(Assembler::xword, r21, r24, r26); // ldadda x21, x24, [x26] - __ ldbica(Assembler::xword, r6, r6, r16); // ldclra x6, x6, [x16] - __ ldeora(Assembler::xword, r16, r25, r16); // ldeora x16, x25, [x16] - __ ldorra(Assembler::xword, r28, r24, r16); // ldseta x28, x24, [x16] - __ ldsmina(Assembler::xword, r26, r15, r10); // ldsmina x26, x15, [x10] - __ ldsmaxa(Assembler::xword, r13, r14, r20); // ldsmaxa x13, x14, [x20] - __ ldumina(Assembler::xword, r1, r23, r30); // ldumina x1, x23, [x30] - __ ldumaxa(Assembler::xword, r14, r2, r6); // ldumaxa x14, x2, [x6] + __ swpa(Assembler::xword, zr, r28, r10); // swpa xzr, x28, [x10] + __ ldadda(Assembler::xword, r26, r2, r12); // ldadda x26, x2, [x12] + __ ldbica(Assembler::xword, r16, zr, r1); // ldclra x16, xzr, [x1] + __ ldeora(Assembler::xword, r13, r29, r0); // ldeora x13, x29, [x0] + __ ldorra(Assembler::xword, r19, r12, r17); // ldseta x19, x12, [x17] + __ ldsmina(Assembler::xword, r22, r13, r28); // ldsmina x22, x13, [x28] + __ ldsmaxa(Assembler::xword, r30, zr, r1); // ldsmaxa x30, xzr, [x1] + __ ldumina(Assembler::xword, r26, r28, r4); // ldumina x26, x28, [x4] + __ ldumaxa(Assembler::xword, r30, r4, r6); // ldumaxa x30, x4, [x6] // LSEOp - __ swpal(Assembler::xword, r3, r8, r25); // swpal x3, x8, [x25] - __ ldaddal(Assembler::xword, r0, r27, r30); // ldaddal x0, x27, [x30] - __ ldbical(Assembler::xword, r5, r5, r30); // ldclral x5, x5, [x30] - __ ldeoral(Assembler::xword, r11, r25, r0); // ldeoral x11, x25, [x0] - __ ldorral(Assembler::xword, zr, r0, r19); // ldsetal xzr, x0, [x19] - __ ldsminal(Assembler::xword, r29, r26, r9); // ldsminal x29, x26, [x9] - __ ldsmaxal(Assembler::xword, r26, r12, r15); // ldsmaxal x26, x12, [x15] - __ lduminal(Assembler::xword, r11, r11, r15); // lduminal x11, x11, [x15] - __ ldumaxal(Assembler::xword, r25, r22, r24); // ldumaxal x25, x22, [x24] + __ swpal(Assembler::xword, r30, r26, r15); // swpal x30, x26, [x15] + __ ldaddal(Assembler::xword, r9, r8, r12); // ldaddal x9, x8, [x12] + __ ldbical(Assembler::xword, r0, r20, r1); // ldclral x0, x20, [x1] + __ ldeoral(Assembler::xword, r24, r2, r0); // ldeoral x24, x2, [x0] + __ ldorral(Assembler::xword, r9, r24, r26); // ldsetal x9, x24, [x26] + __ ldsminal(Assembler::xword, r16, r30, r3); // ldsminal x16, x30, [x3] + __ ldsmaxal(Assembler::xword, r10, r23, r10); // ldsmaxal x10, x23, [x10] + __ lduminal(Assembler::xword, r4, r16, r2); // lduminal x4, x16, [x2] + __ ldumaxal(Assembler::xword, r11, r8, r10); // ldumaxal x11, x8, [x10] // LSEOp - __ swpl(Assembler::xword, r0, r17, r11); // swpl x0, x17, [x11] - __ ldaddl(Assembler::xword, r6, r29, r6); // ldaddl x6, x29, [x6] - __ ldbicl(Assembler::xword, r5, r5, r21); // ldclrl x5, x5, [x21] - __ ldeorl(Assembler::xword, r19, r16, r15); // ldeorl x19, x16, [x15] - __ ldorrl(Assembler::xword, r30, r27, r28); // ldsetl x30, x27, [x28] - __ ldsminl(Assembler::xword, r1, r28, r1); // ldsminl x1, x28, [x1] - __ ldsmaxl(Assembler::xword, r20, r29, r16); // ldsmaxl x20, x29, [x16] - __ lduminl(Assembler::xword, r13, r10, r29); // lduminl x13, x10, [x29] - __ ldumaxl(Assembler::xword, r29, r19, r22); // ldumaxl x29, x19, [x22] + __ swpl(Assembler::xword, r15, r17, r2); // swpl x15, x17, [x2] + __ ldaddl(Assembler::xword, r10, r12, r12); // ldaddl x10, x12, [x12] + __ ldbicl(Assembler::xword, r15, r13, r2); // ldclrl x15, x13, [x2] + __ ldeorl(Assembler::xword, r7, r20, r26); // ldeorl x7, x20, [x26] + __ ldorrl(Assembler::xword, r16, r4, r2); // ldsetl x16, x4, [x2] + __ ldsminl(Assembler::xword, r4, r12, r15); // ldsminl x4, x12, [x15] + __ ldsmaxl(Assembler::xword, r21, r16, r15); // ldsmaxl x21, x16, [x15] + __ lduminl(Assembler::xword, r11, r21, r23); // lduminl x11, x21, [x23] + __ ldumaxl(Assembler::xword, r12, r26, r23); // ldumaxl x12, x26, [x23] // LSEOp - __ swp(Assembler::word, r10, r4, sp); // swp w10, w4, [sp] - __ ldadd(Assembler::word, r21, r8, sp); // ldadd w21, w8, [sp] - __ ldbic(Assembler::word, r19, r10, r28); // ldclr w19, w10, [x28] - __ ldeor(Assembler::word, r2, r25, r5); // ldeor w2, w25, [x5] - __ ldorr(Assembler::word, r3, r8, r22); // ldset w3, w8, [x22] - __ ldsmin(Assembler::word, r19, r13, r5); // ldsmin w19, w13, [x5] - __ ldsmax(Assembler::word, r29, r24, r21); // ldsmax w29, w24, [x21] - __ ldumin(Assembler::word, r26, r24, r3); // ldumin w26, w24, [x3] - __ ldumax(Assembler::word, r24, r26, r23); // ldumax w24, w26, [x23] + __ swp(Assembler::word, r28, r14, r11); // swp w28, w14, [x11] + __ ldadd(Assembler::word, r24, r1, r12); // ldadd w24, w1, [x12] + __ ldbic(Assembler::word, zr, r10, r16); // ldclr wzr, w10, [x16] + __ ldeor(Assembler::word, r7, r2, r3); // ldeor w7, w2, [x3] + __ ldorr(Assembler::word, r13, r19, r17); // ldset w13, w19, [x17] + __ ldsmin(Assembler::word, r16, r3, r1); // ldsmin w16, w3, [x1] + __ ldsmax(Assembler::word, r11, r30, r5); // ldsmax w11, w30, [x5] + __ ldumin(Assembler::word, r8, r15, r29); // ldumin w8, w15, [x29] + __ ldumax(Assembler::word, r30, r0, r20); // ldumax w30, w0, [x20] // LSEOp - __ swpa(Assembler::word, r15, r21, r3); // swpa w15, w21, [x3] - __ ldadda(Assembler::word, r24, r8, r25); // ldadda w24, w8, [x25] - __ ldbica(Assembler::word, r20, r16, r17); // ldclra w20, w16, [x17] - __ ldeora(Assembler::word, r2, r1, r0); // ldeora w2, w1, [x0] - __ ldorra(Assembler::word, r24, r4, r3); // ldseta w24, w4, [x3] - __ ldsmina(Assembler::word, r12, zr, r28); // ldsmina w12, wzr, [x28] - __ ldsmaxa(Assembler::word, r10, r26, r2); // ldsmaxa w10, w26, [x2] - __ ldumina(Assembler::word, r12, r16, sp); // ldumina w12, w16, [sp] - __ ldumaxa(Assembler::word, r1, r13, r29); // ldumaxa w1, w13, [x29] + __ swpa(Assembler::word, r7, r20, r23); // swpa w7, w20, [x23] + __ ldadda(Assembler::word, r28, r21, r27); // ldadda w28, w21, [x27] + __ ldbica(Assembler::word, r25, r5, r1); // ldclra w25, w5, [x1] + __ ldeora(Assembler::word, r23, r16, sp); // ldeora w23, w16, [sp] + __ ldorra(Assembler::word, r5, r12, r9); // ldseta w5, w12, [x9] + __ ldsmina(Assembler::word, r28, r15, r29); // ldsmina w28, w15, [x29] + __ ldsmaxa(Assembler::word, r22, zr, r19); // ldsmaxa w22, wzr, [x19] + __ ldumina(Assembler::word, zr, r5, r14); // ldumina wzr, w5, [x14] + __ ldumaxa(Assembler::word, r16, zr, r15); // ldumaxa w16, wzr, [x15] // LSEOp - __ swpal(Assembler::word, r0, r19, r12); // swpal w0, w19, [x12] - __ ldaddal(Assembler::word, r17, r22, r13); // ldaddal w17, w22, [x13] - __ ldbical(Assembler::word, r28, r30, sp); // ldclral w28, w30, [sp] - __ ldeoral(Assembler::word, r1, r26, r28); // ldeoral w1, w26, [x28] - __ ldorral(Assembler::word, r4, r30, r4); // ldsetal w4, w30, [x4] - __ ldsminal(Assembler::word, r6, r30, r26); // ldsminal w6, w30, [x26] - __ ldsmaxal(Assembler::word, r16, r9, r8); // ldsmaxal w16, w9, [x8] - __ lduminal(Assembler::word, r12, r0, r20); // lduminal w12, w0, [x20] - __ ldumaxal(Assembler::word, r1, r24, r2); // ldumaxal w1, w24, [x2] + __ swpal(Assembler::word, r27, r20, r16); // swpal w27, w20, [x16] + __ ldaddal(Assembler::word, r12, r11, r9); // ldaddal w12, w11, [x9] + __ ldbical(Assembler::word, r6, r30, r17); // ldclral w6, w30, [x17] + __ ldeoral(Assembler::word, r27, r28, r30); // ldeoral w27, w28, [x30] + __ ldorral(Assembler::word, r7, r10, r20); // ldsetal w7, w10, [x20] + __ ldsminal(Assembler::word, r10, r4, r24); // ldsminal w10, w4, [x24] + __ ldsmaxal(Assembler::word, r17, r17, r22); // ldsmaxal w17, w17, [x22] + __ lduminal(Assembler::word, r3, r29, r15); // lduminal w3, w29, [x15] + __ ldumaxal(Assembler::word, r22, r19, r19); // ldumaxal w22, w19, [x19] // LSEOp - __ swpl(Assembler::word, r0, r9, r24); // swpl w0, w9, [x24] - __ ldaddl(Assembler::word, r26, r16, r30); // ldaddl w26, w16, [x30] - __ ldbicl(Assembler::word, r3, r10, r23); // ldclrl w3, w10, [x23] - __ ldeorl(Assembler::word, r10, r4, r15); // ldeorl w10, w4, [x15] - __ ldorrl(Assembler::word, r2, r11, r8); // ldsetl w2, w11, [x8] - __ ldsminl(Assembler::word, r10, r15, r17); // ldsminl w10, w15, [x17] - __ ldsmaxl(Assembler::word, r2, r10, r12); // ldsmaxl w2, w10, [x12] - __ lduminl(Assembler::word, r12, r15, r13); // lduminl w12, w15, [x13] - __ ldumaxl(Assembler::word, r2, r7, r20); // ldumaxl w2, w7, [x20] + __ swpl(Assembler::word, r22, r2, r15); // swpl w22, w2, [x15] + __ ldaddl(Assembler::word, r6, r12, r16); // ldaddl w6, w12, [x16] + __ ldbicl(Assembler::word, r11, r13, r23); // ldclrl w11, w13, [x23] + __ ldeorl(Assembler::word, r1, r30, r19); // ldeorl w1, w30, [x19] + __ ldorrl(Assembler::word, r5, r17, r2); // ldsetl w5, w17, [x2] + __ ldsminl(Assembler::word, r16, r22, r13); // ldsminl w16, w22, [x13] + __ ldsmaxl(Assembler::word, r10, r21, r29); // ldsmaxl w10, w21, [x29] + __ lduminl(Assembler::word, r27, r12, r27); // lduminl w27, w12, [x27] + __ ldumaxl(Assembler::word, r3, r1, sp); // ldumaxl w3, w1, [sp] + +// SHA3SIMDOp + __ bcax(v23, __ T16B, v19, v17, v9); // bcax v23.16B, v19.16B, v17.16B, v9.16B + __ eor3(v27, __ T16B, v26, v14, v6); // eor3 v27.16B, v26.16B, v14.16B, v6.16B + __ rax1(v20, __ T2D, v22, v30); // rax1 v20.2D, v22.2D, v30.2D + __ xar(v24, __ T2D, v2, v30, 54); // xar v24.2D, v2.2D, v30.2D, #54 + +// SHA512SIMDOp + __ sha512h(v17, __ T2D, v10, v22); // sha512h q17, q10, v22.2D + __ sha512h2(v17, __ T2D, v2, v17); // sha512h2 q17, q2, v17.2D + __ sha512su0(v0, __ T2D, v24); // sha512su0 v0.2D, v24.2D + __ sha512su1(v25, __ T2D, v22, v2); // sha512su1 v25.2D, v22.2D, v2.2D // SVEVectorOp - __ sve_add(z25, __ B, z15, z4); // add z25.b, z15.b, z4.b - __ sve_sub(z4, __ S, z11, z17); // sub z4.s, z11.s, z17.s - __ sve_fadd(z16, __ D, z17, z10); // fadd z16.d, z17.d, z10.d - __ sve_fmul(z22, __ D, z12, z25); // fmul z22.d, z12.d, z25.d - __ sve_fsub(z28, __ D, z14, z10); // fsub z28.d, z14.d, z10.d - __ sve_abs(z1, __ H, p3, z30); // abs z1.h, p3/m, z30.h - __ sve_add(z15, __ B, p1, z2); // add z15.b, p1/m, z15.b, z2.b - __ sve_asr(z13, __ S, p4, z16); // asr z13.s, p4/m, z13.s, z16.s - __ sve_cnt(z3, __ D, p0, z11); // cnt z3.d, p0/m, z11.d - __ sve_lsl(z5, __ D, p2, z14); // lsl z5.d, p2/m, z5.d, z14.d - __ sve_lsr(z29, __ B, p0, z20); // lsr z29.b, p0/m, z29.b, z20.b - __ sve_mul(z20, __ S, p5, z27); // mul z20.s, p5/m, z20.s, z27.s - __ sve_neg(z26, __ B, p6, z4); // neg z26.b, p6/m, z4.b - __ sve_not(z22, __ B, p4, z30); // not z22.b, p4/m, z30.b - __ sve_smax(z11, __ H, p2, z27); // smax z11.h, p2/m, z11.h, z27.h - __ sve_smin(z28, __ S, p5, z30); // smin z28.s, p5/m, z28.s, z30.s - __ sve_sub(z30, __ S, p1, z13); // sub z30.s, p1/m, z30.s, z13.s - __ sve_fabs(z30, __ D, p4, z26); // fabs z30.d, p4/m, z26.d - __ sve_fadd(z15, __ S, p3, z11); // fadd z15.s, p3/m, z15.s, z11.s - __ sve_fdiv(z6, __ D, p7, z16); // fdiv z6.d, p7/m, z6.d, z16.d - __ sve_fmax(z27, __ S, p7, z7); // fmax z27.s, p7/m, z27.s, z7.s - __ sve_fmin(z19, __ D, p2, z4); // fmin z19.d, p2/m, z19.d, z4.d - __ sve_fmul(z17, __ S, p4, z22); // fmul z17.s, p4/m, z17.s, z22.s - __ sve_fneg(z28, __ D, p3, z21); // fneg z28.d, p3/m, z21.d - __ sve_frintm(z17, __ S, p5, z2); // frintm z17.s, p5/m, z2.s - __ sve_frintn(z6, __ S, p3, z15); // frintn z6.s, p3/m, z15.s - __ sve_frintp(z12, __ D, p5, z1); // frintp z12.d, p5/m, z1.d - __ sve_fsqrt(z17, __ S, p1, z17); // fsqrt z17.s, p1/m, z17.s - __ sve_fsub(z15, __ S, p5, z13); // fsub z15.s, p5/m, z15.s, z13.s - __ sve_fmla(z20, __ D, p7, z27, z11); // fmla z20.d, p7/m, z27.d, z11.d - __ sve_fmls(z3, __ D, p0, z30, z23); // fmls z3.d, p0/m, z30.d, z23.d - __ sve_fnmla(z17, __ S, p2, z27, z26); // fnmla z17.s, p2/m, z27.s, z26.s - __ sve_fnmls(z6, __ D, p5, z22, z30); // fnmls z6.d, p5/m, z22.d, z30.d - __ sve_mla(z2, __ H, p7, z26, z17); // mla z2.h, p7/m, z26.h, z17.h - __ sve_mls(z22, __ B, p4, z2, z17); // mls z22.b, p4/m, z2.b, z17.b - __ sve_and(z24, z25, z22); // and z24.d, z25.d, z22.d - __ sve_eor(z17, z12, z3); // eor z17.d, z12.d, z3.d - __ sve_orr(z29, z28, z16); // orr z29.d, z28.d, z16.d + __ sve_add(z17, __ D, z12, z3); // add z17.d, z12.d, z3.d + __ sve_sub(z29, __ D, z28, z16); // sub z29.d, z28.d, z16.d + __ sve_fadd(z6, __ D, z9, z28); // fadd z6.d, z9.d, z28.d + __ sve_fmul(z7, __ S, z4, z7); // fmul z7.s, z4.s, z7.s + __ sve_fsub(z9, __ S, z22, z8); // fsub z9.s, z22.s, z8.s + __ sve_abs(z27, __ B, p5, z30); // abs z27.b, p5/m, z30.b + __ sve_add(z26, __ H, p0, z16); // add z26.h, p0/m, z26.h, z16.h + __ sve_asr(z3, __ D, p6, z8); // asr z3.d, p6/m, z3.d, z8.d + __ sve_cnt(z21, __ D, p6, z26); // cnt z21.d, p6/m, z26.d + __ sve_lsl(z22, __ B, p0, z4); // lsl z22.b, p0/m, z22.b, z4.b + __ sve_lsr(z17, __ H, p0, z3); // lsr z17.h, p0/m, z17.h, z3.h + __ sve_mul(z1, __ B, p2, z6); // mul z1.b, p2/m, z1.b, z6.b + __ sve_neg(z9, __ S, p7, z7); // neg z9.s, p7/m, z7.s + __ sve_not(z22, __ H, p5, z5); // not z22.h, p5/m, z5.h + __ sve_smax(z8, __ B, p4, z30); // smax z8.b, p4/m, z8.b, z30.b + __ sve_smin(z17, __ D, p0, z11); // smin z17.d, p0/m, z17.d, z11.d + __ sve_sub(z28, __ S, p0, z26); // sub z28.s, p0/m, z28.s, z26.s + __ sve_fabs(z28, __ D, p3, z13); // fabs z28.d, p3/m, z13.d + __ sve_fadd(z16, __ S, p6, z5); // fadd z16.s, p6/m, z16.s, z5.s + __ sve_fdiv(z13, __ S, p2, z15); // fdiv z13.s, p2/m, z13.s, z15.s + __ sve_fmax(z26, __ S, p5, z11); // fmax z26.s, p5/m, z26.s, z11.s + __ sve_fmin(z22, __ S, p4, z4); // fmin z22.s, p4/m, z22.s, z4.s + __ sve_fmul(z19, __ S, p4, z17); // fmul z19.s, p4/m, z19.s, z17.s + __ sve_fneg(z14, __ D, p3, z2); // fneg z14.d, p3/m, z2.d + __ sve_frintm(z3, __ S, p5, z23); // frintm z3.s, p5/m, z23.s + __ sve_frintn(z6, __ S, p1, z17); // frintn z6.s, p1/m, z17.s + __ sve_frintp(z27, __ S, p4, z16); // frintp z27.s, p4/m, z16.s + __ sve_fsqrt(z2, __ S, p7, z3); // fsqrt z2.s, p7/m, z3.s + __ sve_fsub(z6, __ S, p4, z19); // fsub z6.s, p4/m, z6.s, z19.s + __ sve_fmla(z12, __ D, p5, z8, z24); // fmla z12.d, p5/m, z8.d, z24.d + __ sve_fmls(z17, __ S, p0, z10, z23); // fmls z17.s, p0/m, z10.s, z23.s + __ sve_fnmla(z19, __ S, p7, z13, z16); // fnmla z19.s, p7/m, z13.s, z16.s + __ sve_fnmls(z0, __ D, p1, z14, z17); // fnmls z0.d, p1/m, z14.d, z17.d + __ sve_mla(z8, __ S, p2, z22, z20); // mla z8.s, p2/m, z22.s, z20.s + __ sve_mls(z27, __ S, p0, z3, z15); // mls z27.s, p0/m, z3.s, z15.s + __ sve_and(z20, z7, z4); // and z20.d, z7.d, z4.d + __ sve_eor(z7, z0, z8); // eor z7.d, z0.d, z8.d + __ sve_orr(z19, z22, z4); // orr z19.d, z22.d, z4.d // SVEReductionOp - __ sve_andv(v6, __ S, p2, z28); // andv s6, p2, z28.s - __ sve_orv(v7, __ H, p1, z7); // orv h7, p1, z7.h - __ sve_eorv(v9, __ B, p5, z8); // eorv b9, p5, z8.b - __ sve_smaxv(v27, __ B, p5, z30); // smaxv b27, p5, z30.b - __ sve_sminv(v26, __ H, p0, z16); // sminv h26, p0, z16.h - __ sve_fminv(v3, __ D, p6, z8); // fminv d3, p6, z8.d - __ sve_fmaxv(v21, __ D, p6, z26); // fmaxv d21, p6, z26.d - __ sve_fadda(v22, __ S, p0, z4); // fadda s22, p0, s22, z4.s - __ sve_uaddv(v17, __ H, p0, z3); // uaddv d17, p0, z3.h + __ sve_andv(v9, __ D, p5, z11); // andv d9, p5, z11.d + __ sve_orv(v5, __ H, p7, z16); // orv h5, p7, z16.h + __ sve_eorv(v22, __ H, p3, z1); // eorv h22, p3, z1.h + __ sve_smaxv(v8, __ D, p5, z16); // smaxv d8, p5, z16.d + __ sve_sminv(v15, __ S, p1, z4); // sminv s15, p1, z4.s + __ sve_fminv(v8, __ S, p1, z29); // fminv s8, p1, z29.s + __ sve_fmaxv(v28, __ D, p4, z29); // fmaxv d28, p4, z29.d + __ sve_fadda(v9, __ S, p3, z2); // fadda s9, p3, s9, z2.s + __ sve_uaddv(v28, __ B, p0, z7); // uaddv d28, p0, z7.b __ bind(forth); /* -aarch64ops.o: file format elf64-littleaarch64 - - -Disassembly of section .text: - -0000000000000000 : - 0: 8b0d82fa add x26, x23, x13, lsl #32 - 4: cb49970c sub x12, x24, x9, lsr #37 - 8: ab889dfc adds x28, x15, x8, asr #39 - c: eb9ee787 subs x7, x28, x30, asr #57 - 10: 0b9b3ec9 add w9, w22, w27, asr #15 - 14: 4b9179a3 sub w3, w13, w17, asr #30 - 18: 2b88474e adds w14, w26, w8, asr #17 - 1c: 6b8c56c0 subs w0, w22, w12, asr #21 - 20: 8a1a51e0 and x0, x15, x26, lsl #20 - 24: aa11f4ba orr x26, x5, x17, lsl #61 - 28: ca0281b8 eor x24, x13, x2, lsl #32 - 2c: ea918c7c ands x28, x3, x17, asr #35 - 30: 0a5d4a19 and w25, w16, w29, lsr #18 - 34: 2a4b262d orr w13, w17, w11, lsr #9 - 38: 4a513ca5 eor w5, w5, w17, lsr #15 - 3c: 6a9b6ae2 ands w2, w23, w27, asr #26 - 40: 8a70b79b bic x27, x28, x16, lsr #45 - 44: aaba9728 orn x8, x25, x26, asr #37 - 48: ca6dfe3d eon x29, x17, x13, lsr #63 - 4c: ea627f1c bics x28, x24, x2, lsr #31 - 50: 0aa70f53 bic w19, w26, w7, asr #3 - 54: 2aaa0f06 orn w6, w24, w10, asr #3 - 58: 4a6176a4 eon w4, w21, w1, lsr #29 - 5c: 6a604eb0 bics w16, w21, w0, lsr #19 - 60: 1105ed91 add w17, w12, #0x17b - 64: 3100583e adds w30, w1, #0x16 - 68: 5101f8bd sub w29, w5, #0x7e - 6c: 710f0306 subs w6, w24, #0x3c0 - 70: 9101a1a0 add x0, x13, #0x68 - 74: b10a5cc8 adds x8, x6, #0x297 - 78: d10810aa sub x10, x5, #0x204 - 7c: f10fd061 subs x1, x3, #0x3f4 - 80: 120cb166 and w6, w11, #0xfff1fff1 - 84: 321764bc orr w28, w5, #0xfffffe07 - 88: 52174681 eor w1, w20, #0x7fffe00 - 8c: 720c0227 ands w7, w17, #0x100000 - 90: 9241018e and x14, x12, #0x8000000000000000 - 94: b25a2969 orr x9, x11, #0x1ffc000000000 - 98: d278b411 eor x17, x0, #0x3fffffffffff00 - 9c: f26aad01 ands x1, x8, #0xffffffffffc00003 - a0: 14000000 b a0 - a4: 17ffffd7 b 0 - a8: 14000242 b 9b0 - ac: 94000000 bl ac - b0: 97ffffd4 bl 0 - b4: 9400023f bl 9b0 - b8: 3400000a cbz w10, b8 - bc: 34fffa2a cbz w10, 0 - c0: 3400478a cbz w10, 9b0 - c4: 35000008 cbnz w8, c4 - c8: 35fff9c8 cbnz w8, 0 - cc: 35004728 cbnz w8, 9b0 - d0: b400000b cbz x11, d0 - d4: b4fff96b cbz x11, 0 - d8: b40046cb cbz x11, 9b0 - dc: b500001d cbnz x29, dc - e0: b5fff91d cbnz x29, 0 - e4: b500467d cbnz x29, 9b0 - e8: 10000013 adr x19, e8 - ec: 10fff8b3 adr x19, 0 - f0: 10004613 adr x19, 9b0 - f4: 90000013 adrp x19, 0 - f8: 36300016 tbz w22, #6, f8 - fc: 3637f836 tbz w22, #6, 0 - 100: 36304596 tbz w22, #6, 9b0 - 104: 3758000c tbnz w12, #11, 104 - 108: 375ff7cc tbnz w12, #11, 0 - 10c: 3758452c tbnz w12, #11, 9b0 - 110: 128313a0 mov w0, #0xffffe762 // #-6302 - 114: 528a32c7 mov w7, #0x5196 // #20886 - 118: 7289173b movk w27, #0x48b9 - 11c: 92ab3acc mov x12, #0xffffffffa629ffff // #-1507196929 - 120: d2a0bf94 mov x20, #0x5fc0000 // #100401152 - 124: f2c285e8 movk x8, #0x142f, lsl #32 - 128: 9358722f sbfx x15, x17, #24, #5 - 12c: 330e652f bfxil w15, w9, #14, #12 - 130: 53067f3b lsr w27, w25, #6 - 134: 93577c53 sbfx x19, x2, #23, #9 - 138: b34a1aac bfi x12, x21, #54, #7 - 13c: d35a4016 ubfiz x22, x0, #38, #17 - 140: 13946c63 extr w3, w3, w20, #27 - 144: 93c3dbc8 extr x8, x30, x3, #54 - 148: 54000000 b.eq 148 // b.none - 14c: 54fff5a0 b.eq 0 // b.none - 150: 54004300 b.eq 9b0 // b.none - 154: 54000001 b.ne 154 // b.any - 158: 54fff541 b.ne 0 // b.any - 15c: 540042a1 b.ne 9b0 // b.any - 160: 54000002 b.cs 160 // b.hs, b.nlast - 164: 54fff4e2 b.cs 0 // b.hs, b.nlast - 168: 54004242 b.cs 9b0 // b.hs, b.nlast - 16c: 54000002 b.cs 16c // b.hs, b.nlast - 170: 54fff482 b.cs 0 // b.hs, b.nlast - 174: 540041e2 b.cs 9b0 // b.hs, b.nlast - 178: 54000003 b.cc 178 // b.lo, b.ul, b.last - 17c: 54fff423 b.cc 0 // b.lo, b.ul, b.last - 180: 54004183 b.cc 9b0 // b.lo, b.ul, b.last - 184: 54000003 b.cc 184 // b.lo, b.ul, b.last - 188: 54fff3c3 b.cc 0 // b.lo, b.ul, b.last - 18c: 54004123 b.cc 9b0 // b.lo, b.ul, b.last - 190: 54000004 b.mi 190 // b.first - 194: 54fff364 b.mi 0 // b.first - 198: 540040c4 b.mi 9b0 // b.first - 19c: 54000005 b.pl 19c // b.nfrst - 1a0: 54fff305 b.pl 0 // b.nfrst - 1a4: 54004065 b.pl 9b0 // b.nfrst - 1a8: 54000006 b.vs 1a8 - 1ac: 54fff2a6 b.vs 0 - 1b0: 54004006 b.vs 9b0 - 1b4: 54000007 b.vc 1b4 - 1b8: 54fff247 b.vc 0 - 1bc: 54003fa7 b.vc 9b0 - 1c0: 54000008 b.hi 1c0 // b.pmore - 1c4: 54fff1e8 b.hi 0 // b.pmore - 1c8: 54003f48 b.hi 9b0 // b.pmore - 1cc: 54000009 b.ls 1cc // b.plast - 1d0: 54fff189 b.ls 0 // b.plast - 1d4: 54003ee9 b.ls 9b0 // b.plast - 1d8: 5400000a b.ge 1d8 // b.tcont - 1dc: 54fff12a b.ge 0 // b.tcont - 1e0: 54003e8a b.ge 9b0 // b.tcont - 1e4: 5400000b b.lt 1e4 // b.tstop - 1e8: 54fff0cb b.lt 0 // b.tstop - 1ec: 54003e2b b.lt 9b0 // b.tstop - 1f0: 5400000c b.gt 1f0 - 1f4: 54fff06c b.gt 0 - 1f8: 54003dcc b.gt 9b0 - 1fc: 5400000d b.le 1fc - 200: 54fff00d b.le 0 - 204: 54003d6d b.le 9b0 - 208: 5400000e b.al 208 - 20c: 54ffefae b.al 0 - 210: 54003d0e b.al 9b0 - 214: 5400000f b.nv 214 - 218: 54ffef4f b.nv 0 - 21c: 54003caf b.nv 9b0 - 220: d40658e1 svc #0x32c7 - 224: d4014d22 hvc #0xa69 - 228: d4046543 smc #0x232a - 22c: d4273f60 brk #0x39fb - 230: d44cad80 hlt #0x656c - 234: d503201f nop - 238: d69f03e0 eret - 23c: d6bf03e0 drps - 240: d5033fdf isb - 244: d5033e9f dsb st - 248: d50332bf dmb oshst - 24c: d61f0200 br x16 - 250: d63f0280 blr x20 - 254: c80a7d1b stxr w10, x27, [x8] - 258: c800fea1 stlxr w0, x1, [x21] - 25c: c85f7fb1 ldxr x17, [x29] - 260: c85fff9d ldaxr x29, [x28] - 264: c89ffee1 stlr x1, [x23] - 268: c8dffe95 ldar x21, [x20] - 26c: 88167e7b stxr w22, w27, [x19] - 270: 880bfcd0 stlxr w11, w16, [x6] - 274: 885f7c11 ldxr w17, [x0] - 278: 885ffd44 ldaxr w4, [x10] - 27c: 889ffed8 stlr w24, [x22] - 280: 88dffe6a ldar w10, [x19] - 284: 48017fc5 stxrh w1, w5, [x30] - 288: 4808fe2c stlxrh w8, w12, [x17] - 28c: 485f7dc9 ldxrh w9, [x14] - 290: 485ffc27 ldaxrh w7, [x1] - 294: 489ffe05 stlrh w5, [x16] - 298: 48dffd82 ldarh w2, [x12] - 29c: 080a7c6c stxrb w10, w12, [x3] - 2a0: 081cff4e stlxrb w28, w14, [x26] - 2a4: 085f7d5e ldxrb w30, [x10] - 2a8: 085ffeae ldaxrb w14, [x21] - 2ac: 089ffd2d stlrb w13, [x9] - 2b0: 08dfff76 ldarb w22, [x27] - 2b4: c87f4d7c ldxp x28, x19, [x11] - 2b8: c87fcc5e ldaxp x30, x19, [x2] - 2bc: c8220417 stxp w2, x23, x1, [x0] - 2c0: c82cb5f0 stlxp w12, x16, x13, [x15] - 2c4: 887f55b1 ldxp w17, w21, [x13] - 2c8: 887ff90b ldaxp w11, w30, [x8] - 2cc: 88382c2d stxp w24, w13, w11, [x1] - 2d0: 883aedb5 stlxp w26, w21, w27, [x13] - 2d4: f819928b stur x11, [x20, #-103] - 2d8: b803e21c stur w28, [x16, #62] - 2dc: 381f713b sturb w27, [x9, #-9] - 2e0: 781ce322 sturh w2, [x25, #-50] - 2e4: f850f044 ldur x4, [x2, #-241] - 2e8: b85e129e ldur w30, [x20, #-31] - 2ec: 385e92f1 ldurb w17, [x23, #-23] - 2f0: 785ff35d ldurh w29, [x26, #-1] - 2f4: 39801921 ldrsb x1, [x9, #6] - 2f8: 7881318b ldursh x11, [x12, #19] - 2fc: 78dce02b ldursh w11, [x1, #-50] - 300: b8829313 ldursw x19, [x24, #41] - 304: fc45f318 ldur d24, [x24, #95] - 308: bc5d50af ldur s15, [x5, #-43] - 30c: fc001375 stur d21, [x27, #1] - 310: bc1951b7 stur s23, [x13, #-107] - 314: f8008c0b str x11, [x0, #8]! - 318: b801dc03 str w3, [x0, #29]! - 31c: 38009dcb strb w11, [x14, #9]! - 320: 781fdf1d strh w29, [x24, #-3]! - 324: f8570e2d ldr x13, [x17, #-144]! - 328: b85faecc ldr w12, [x22, #-6]! - 32c: 385f6d8d ldrb w13, [x12, #-10]! - 330: 785ebea0 ldrh w0, [x21, #-21]! - 334: 38804cf7 ldrsb x23, [x7, #4]! - 338: 789cbce3 ldrsh x3, [x7, #-53]! - 33c: 78df9cbc ldrsh w28, [x5, #-7]! - 340: b89eed38 ldrsw x24, [x9, #-18]! - 344: fc40cd6e ldr d14, [x11, #12]! - 348: bc5bdd93 ldr s19, [x12, #-67]! - 34c: fc103c14 str d20, [x0, #-253]! - 350: bc040c08 str s8, [x0, #64]! - 354: f81a2784 str x4, [x28], #-94 - 358: b81ca4ec str w12, [x7], #-54 - 35c: 381e855b strb w27, [x10], #-24 - 360: 7801b506 strh w6, [x8], #27 - 364: f853654e ldr x14, [x10], #-202 - 368: b85d74b0 ldr w16, [x5], #-41 - 36c: 384095c2 ldrb w2, [x14], #9 - 370: 785ec5bc ldrh w28, [x13], #-20 - 374: 389e15a9 ldrsb x9, [x13], #-31 - 378: 789dc703 ldrsh x3, [x24], #-36 - 37c: 78c06474 ldrsh w20, [x3], #6 - 380: b89ff667 ldrsw x7, [x19], #-1 - 384: fc57e51e ldr d30, [x8], #-130 - 388: bc4155f9 ldr s25, [x15], #21 - 38c: fc05a6ee str d14, [x23], #90 - 390: bc1df408 str s8, [x0], #-33 - 394: f835da2a str x10, [x17, w21, sxtw #3] - 398: b836d9a4 str w4, [x13, w22, sxtw #2] - 39c: 3833580d strb w13, [x0, w19, uxtw #0] - 3a0: 7826cb6c strh w12, [x27, w6, sxtw] - 3a4: f8706900 ldr x0, [x8, x16] - 3a8: b87ae880 ldr w0, [x4, x26, sxtx] - 3ac: 3865db2e ldrb w14, [x25, w5, sxtw #0] - 3b0: 78714889 ldrh w9, [x4, w17, uxtw] - 3b4: 38a7789b ldrsb x27, [x4, x7, lsl #0] - 3b8: 78beca2f ldrsh x15, [x17, w30, sxtw] - 3bc: 78f6c810 ldrsh w16, [x0, w22, sxtw] - 3c0: b8bef956 ldrsw x22, [x10, x30, sxtx #2] - 3c4: fc6afabd ldr d29, [x21, x10, sxtx #3] - 3c8: bc734963 ldr s3, [x11, w19, uxtw] - 3cc: fc3d5b8d str d13, [x28, w29, uxtw #3] - 3d0: bc25fbb7 str s23, [x29, x5, sxtx #2] - 3d4: f9189d05 str x5, [x8, #12600] - 3d8: b91ecb1d str w29, [x24, #7880] - 3dc: 39187a33 strb w19, [x17, #1566] - 3e0: 791f226d strh w13, [x19, #3984] - 3e4: f95aa2f3 ldr x19, [x23, #13632] - 3e8: b9587bb7 ldr w23, [x29, #6264] - 3ec: 395f7176 ldrb w22, [x11, #2012] - 3f0: 795d9143 ldrh w3, [x10, #3784] - 3f4: 399e7e08 ldrsb x8, [x16, #1951] - 3f8: 799a2697 ldrsh x23, [x20, #3346] - 3fc: 79df3422 ldrsh w2, [x1, #3994] - 400: b99c2624 ldrsw x4, [x17, #7204] - 404: fd5c2374 ldr d20, [x27, #14400] - 408: bd5fa1d9 ldr s25, [x14, #8096] - 40c: fd1d595a str d26, [x10, #15024] - 410: bd1b1869 str s9, [x3, #6936] - 414: 58002cfb ldr x27, 9b0 - 418: 1800000b ldr w11, 418 - 41c: f8945060 prfum pldl1keep, [x3, #-187] - 420: d8000000 prfm pldl1keep, 420 - 424: f8ae6ba0 prfm pldl1keep, [x29, x14] - 428: f99a0080 prfm pldl1keep, [x4, #13312] - 42c: 1a070035 adc w21, w1, w7 - 430: 3a0700a8 adcs w8, w5, w7 - 434: 5a0e0367 sbc w7, w27, w14 - 438: 7a11009b sbcs w27, w4, w17 - 43c: 9a000380 adc x0, x28, x0 - 440: ba1e030c adcs x12, x24, x30 - 444: da0f0320 sbc x0, x25, x15 - 448: fa030301 sbcs x1, x24, x3 - 44c: 0b340b11 add w17, w24, w20, uxtb #2 - 450: 2b2a278d adds w13, w28, w10, uxth #1 - 454: cb22aa0f sub x15, x16, w2, sxth #2 - 458: 6b2d29bd subs w29, w13, w13, uxth #2 - 45c: 8b2cce8c add x12, x20, w12, sxtw #3 - 460: ab2b877e adds x30, x27, w11, sxtb #1 - 464: cb21c8ee sub x14, x7, w1, sxtw #2 - 468: eb3ba47d subs x29, x3, w27, sxth #1 - 46c: 3a4d400e ccmn w0, w13, #0xe, mi // mi = first - 470: 7a5132c6 ccmp w22, w17, #0x6, cc // cc = lo, ul, last - 474: ba5e622e ccmn x17, x30, #0xe, vs - 478: fa53814c ccmp x10, x19, #0xc, hi // hi = pmore - 47c: 3a52d8c2 ccmn w6, #0x12, #0x2, le - 480: 7a4d8924 ccmp w9, #0xd, #0x4, hi // hi = pmore - 484: ba4b3aab ccmn x21, #0xb, #0xb, cc // cc = lo, ul, last - 488: fa4d7882 ccmp x4, #0xd, #0x2, vc - 48c: 1a96804c csel w12, w2, w22, hi // hi = pmore - 490: 1a912618 csinc w24, w16, w17, cs // cs = hs, nlast - 494: 5a90b0e6 csinv w6, w7, w16, lt // lt = tstop - 498: 5a96976b csneg w11, w27, w22, ls // ls = plast - 49c: 9a9db06a csel x10, x3, x29, lt // lt = tstop - 4a0: 9a9b374c csinc x12, x26, x27, cc // cc = lo, ul, last - 4a4: da95c14f csinv x15, x10, x21, gt - 4a8: da89c6fe csneg x30, x23, x9, gt - 4ac: 5ac0015e rbit w30, w10 - 4b0: 5ac005fd rev16 w29, w15 - 4b4: 5ac00bdd rev w29, w30 - 4b8: 5ac012b9 clz w25, w21 - 4bc: 5ac01404 cls w4, w0 - 4c0: dac002b1 rbit x17, x21 - 4c4: dac0061d rev16 x29, x16 - 4c8: dac00a95 rev32 x21, x20 - 4cc: dac00e66 rev x6, x19 - 4d0: dac0107e clz x30, x3 - 4d4: dac01675 cls x21, x19 - 4d8: 1ac00b0b udiv w11, w24, w0 - 4dc: 1ace0f3b sdiv w27, w25, w14 - 4e0: 1ad121c3 lsl w3, w14, w17 - 4e4: 1ad825e7 lsr w7, w15, w24 - 4e8: 1ad92a3c asr w28, w17, w25 - 4ec: 1adc2f42 ror w2, w26, w28 - 4f0: 9ada0b25 udiv x5, x25, x26 - 4f4: 9ad10e1b sdiv x27, x16, x17 - 4f8: 9acc22a6 lsl x6, x21, x12 - 4fc: 9acc2480 lsr x0, x4, x12 - 500: 9adc2a3b asr x27, x17, x28 - 504: 9ad12c5c ror x28, x2, x17 - 508: 9bce7dea umulh x10, x15, x14 - 50c: 9b597c6e smulh x14, x3, x25 - 510: 1b0e166f madd w15, w19, w14, w5 - 514: 1b1ae490 msub w16, w4, w26, w25 - 518: 9b023044 madd x4, x2, x2, x12 - 51c: 9b089e3d msub x29, x17, x8, x7 - 520: 9b391083 smaddl x3, w4, w25, x4 - 524: 9b24c73a smsubl x26, w25, w4, x17 - 528: 9bb15f40 umaddl x0, w26, w17, x23 - 52c: 9bbcc6af umsubl x15, w21, w28, x17 - 530: 1e23095b fmul s27, s10, s3 - 534: 1e3918e0 fdiv s0, s7, s25 - 538: 1e2f28c9 fadd s9, s6, s15 - 53c: 1e2a39fd fsub s29, s15, s10 - 540: 1e270a22 fmul s2, s17, s7 - 544: 1e77096b fmul d11, d11, d23 - 548: 1e771ba7 fdiv d7, d29, d23 - 54c: 1e6b2b6e fadd d14, d27, d11 - 550: 1e78388b fsub d11, d4, d24 - 554: 1e6e09ec fmul d12, d15, d14 - 558: 1f1c3574 fmadd s20, s11, s28, s13 - 55c: 1f17f98b fmsub s11, s12, s23, s30 - 560: 1f2935da fnmadd s26, s14, s9, s13 - 564: 1f2574ea fnmadd s10, s7, s5, s29 - 568: 1f4b306f fmadd d15, d3, d11, d12 - 56c: 1f5ec7cf fmsub d15, d30, d30, d17 - 570: 1f6f3e93 fnmadd d19, d20, d15, d15 - 574: 1f6226a9 fnmadd d9, d21, d2, d9 - 578: 1e2040fb fmov s27, s7 - 57c: 1e20c3dd fabs s29, s30 - 580: 1e214031 fneg s17, s1 - 584: 1e21c0c2 fsqrt s2, s6 - 588: 1e22c06a fcvt d10, s3 - 58c: 1e604178 fmov d24, d11 - 590: 1e60c027 fabs d7, d1 - 594: 1e61400b fneg d11, d0 - 598: 1e61c223 fsqrt d3, d17 - 59c: 1e6240dc fcvt s28, d6 - 5a0: 1e3800d6 fcvtzs w22, s6 - 5a4: 9e380360 fcvtzs x0, s27 - 5a8: 1e78005a fcvtzs w26, d2 - 5ac: 9e7800e5 fcvtzs x5, d7 - 5b0: 1e22017c scvtf s28, w11 - 5b4: 9e2201b9 scvtf s25, x13 - 5b8: 1e6202eb scvtf d11, w23 - 5bc: 9e620113 scvtf d19, x8 - 5c0: 1e2602b1 fmov w17, s21 - 5c4: 9e660299 fmov x25, d20 - 5c8: 1e270233 fmov s19, w17 - 5cc: 9e6703a2 fmov d2, x29 - 5d0: 1e2822c0 fcmp s22, s8 - 5d4: 1e7322a0 fcmp d21, d19 - 5d8: 1e202288 fcmp s20, #0.0 - 5dc: 1e602168 fcmp d11, #0.0 - 5e0: 293c19f4 stp w20, w6, [x15, #-32] - 5e4: 2966387b ldp w27, w14, [x3, #-208] - 5e8: 69762971 ldpsw x17, x10, [x11, #-80] - 5ec: a9041dc7 stp x7, x7, [x14, #64] - 5f0: a9475c0c ldp x12, x23, [x0, #112] - 5f4: 29b61ccd stp w13, w7, [x6, #-80]! - 5f8: 29ee405e ldp w30, w16, [x2, #-144]! - 5fc: 69ee0744 ldpsw x4, x1, [x26, #-144]! - 600: a9843977 stp x23, x14, [x11, #64]! - 604: a9f46ebd ldp x29, x27, [x21, #-192]! - 608: 28ba16b6 stp w22, w5, [x21], #-48 - 60c: 28fc44db ldp w27, w17, [x6], #-32 - 610: 68f61831 ldpsw x17, x6, [x1], #-80 - 614: a8b352ad stp x13, x20, [x21], #-208 - 618: a8c56d5e ldp x30, x27, [x10], #80 - 61c: 28024565 stnp w5, w17, [x11, #16] - 620: 2874134e ldnp w14, w4, [x26, #-96] - 624: a8027597 stnp x23, x29, [x12, #32] - 628: a87b1aa0 ldnp x0, x6, [x21, #-80] - 62c: 0c40734f ld1 {v15.8b}, [x26] - 630: 4cdfa177 ld1 {v23.16b, v24.16b}, [x11], #32 - 634: 0cc76ee8 ld1 {v8.1d-v10.1d}, [x23], x7 - 638: 4cdf2733 ld1 {v19.8h-v22.8h}, [x25], #64 - 63c: 0d40c23d ld1r {v29.8b}, [x17] - 640: 4ddfcaf8 ld1r {v24.4s}, [x23], #4 - 644: 0dd9ccaa ld1r {v10.1d}, [x5], x25 - 648: 4c408d51 ld2 {v17.2d, v18.2d}, [x10] - 64c: 0cdf85ec ld2 {v12.4h, v13.4h}, [x15], #16 - 650: 4d60c239 ld2r {v25.16b, v26.16b}, [x17] - 654: 0dffcbc1 ld2r {v1.2s, v2.2s}, [x30], #8 - 658: 4de9ce30 ld2r {v16.2d, v17.2d}, [x17], x9 - 65c: 4cc24999 ld3 {v25.4s-v27.4s}, [x12], x2 - 660: 0c404a7a ld3 {v26.2s-v28.2s}, [x19] - 664: 4d40e6af ld3r {v15.8h-v17.8h}, [x21] - 668: 4ddfe9b9 ld3r {v25.4s-v27.4s}, [x13], #12 - 66c: 0dddef8e ld3r {v14.1d-v16.1d}, [x28], x29 - 670: 4cdf07b1 ld4 {v17.8h-v20.8h}, [x29], #64 - 674: 0cc000fb ld4 {v27.8b-v30.8b}, [x7], x0 - 678: 0d60e238 ld4r {v24.8b-v27.8b}, [x17] - 67c: 0dffe740 ld4r {v0.4h-v3.4h}, [x26], #8 - 680: 0de2eb2c ld4r {v12.2s-v15.2s}, [x25], x2 - 684: ce648376 sha512h q22, q27, v4.2d - 688: ce6184c7 sha512h2 q7, q6, v1.2d - 68c: cec081fa sha512su0 v26.2d, v15.2d - 690: ce6d89a2 sha512su1 v2.2d, v13.2d, v13.2d - 694: ba5fd3e3 ccmn xzr, xzr, #0x3, le - 698: 3a5f03e5 ccmn wzr, wzr, #0x5, eq // eq = none - 69c: fa411be4 ccmp xzr, #0x1, #0x4, ne // ne = any - 6a0: 7a42cbe2 ccmp wzr, #0x2, #0x2, gt - 6a4: 93df03ff ror xzr, xzr, #0 - 6a8: c820ffff stlxp w0, xzr, xzr, [sp] - 6ac: 8822fc7f stlxp w2, wzr, wzr, [x3] - 6b0: c8247cbf stxp w4, xzr, xzr, [x5] - 6b4: 88267fff stxp w6, wzr, wzr, [sp] - 6b8: 4e010fe0 dup v0.16b, wzr - 6bc: 4e081fe1 mov v1.d[0], xzr - 6c0: 4e0c1fe1 mov v1.s[1], wzr - 6c4: 4e0a1fe1 mov v1.h[2], wzr - 6c8: 4e071fe1 mov v1.b[3], wzr - 6cc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0 - 6d0: 05a08020 mov z0.s, p0/m, s1 - 6d4: 04b0e3e0 incw x0 - 6d8: 0470e7e1 dech x1 - 6dc: 042f9c20 lsl z0.b, z1.b, #7 - 6e0: 043f9c35 lsl z21.h, z1.h, #15 - 6e4: 047f9c20 lsl z0.s, z1.s, #31 - 6e8: 04ff9c20 lsl z0.d, z1.d, #63 - 6ec: 04299420 lsr z0.b, z1.b, #7 - 6f0: 04319160 asr z0.h, z11.h, #15 - 6f4: 0461943e lsr z30.s, z1.s, #31 - 6f8: 04a19020 asr z0.d, z1.d, #63 - 6fc: 042053ff addvl sp, x0, #31 - 700: 047f5401 addpl x1, sp, #-32 - 704: 25208028 cntp x8, p0, p1.b - 708: 2538cfe0 mov z0.b, #127 - 70c: 2578d001 mov z1.h, #-128 - 710: 25b8efe2 mov z2.s, #32512 - 714: 25f8f007 mov z7.d, #-32768 - 718: a400a3e0 ld1b {z0.b}, p0/z, [sp] - 71c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl] - 720: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl] - 724: a4084ffe ld1b {z30.b}, p3/z, [sp, x8] - 728: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2] - 72c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3] - 730: e400fbf6 st1b {z22.b}, p6, [sp] - 734: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl] - 738: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl] - 73c: e4014be0 st1b {z0.b}, p2, [sp, x1] - 740: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1] - 744: e5f15000 st1d {z0.d}, p4, [x0, x17, lsl #3] - 748: 858043e0 ldr z0, [sp] - 74c: 85a043ff ldr z31, [sp, #-256, mul vl] - 750: e59f5d08 str z8, [x8, #255, mul vl] - 754: 1e601000 fmov d0, #2.000000000000000000e+00 - 758: 1e603000 fmov d0, #2.125000000000000000e+00 - 75c: 1e621000 fmov d0, #4.000000000000000000e+00 - 760: 1e623000 fmov d0, #4.250000000000000000e+00 - 764: 1e641000 fmov d0, #8.000000000000000000e+00 - 768: 1e643000 fmov d0, #8.500000000000000000e+00 - 76c: 1e661000 fmov d0, #1.600000000000000000e+01 - 770: 1e663000 fmov d0, #1.700000000000000000e+01 - 774: 1e681000 fmov d0, #1.250000000000000000e-01 - 778: 1e683000 fmov d0, #1.328125000000000000e-01 - 77c: 1e6a1000 fmov d0, #2.500000000000000000e-01 - 780: 1e6a3000 fmov d0, #2.656250000000000000e-01 - 784: 1e6c1000 fmov d0, #5.000000000000000000e-01 - 788: 1e6c3000 fmov d0, #5.312500000000000000e-01 - 78c: 1e6e1000 fmov d0, #1.000000000000000000e+00 - 790: 1e6e3000 fmov d0, #1.062500000000000000e+00 - 794: 1e701000 fmov d0, #-2.000000000000000000e+00 - 798: 1e703000 fmov d0, #-2.125000000000000000e+00 - 79c: 1e721000 fmov d0, #-4.000000000000000000e+00 - 7a0: 1e723000 fmov d0, #-4.250000000000000000e+00 - 7a4: 1e741000 fmov d0, #-8.000000000000000000e+00 - 7a8: 1e743000 fmov d0, #-8.500000000000000000e+00 - 7ac: 1e761000 fmov d0, #-1.600000000000000000e+01 - 7b0: 1e763000 fmov d0, #-1.700000000000000000e+01 - 7b4: 1e781000 fmov d0, #-1.250000000000000000e-01 - 7b8: 1e783000 fmov d0, #-1.328125000000000000e-01 - 7bc: 1e7a1000 fmov d0, #-2.500000000000000000e-01 - 7c0: 1e7a3000 fmov d0, #-2.656250000000000000e-01 - 7c4: 1e7c1000 fmov d0, #-5.000000000000000000e-01 - 7c8: 1e7c3000 fmov d0, #-5.312500000000000000e-01 - 7cc: 1e7e1000 fmov d0, #-1.000000000000000000e+00 - 7d0: 1e7e3000 fmov d0, #-1.062500000000000000e+00 - 7d4: f8388098 swp x24, x24, [x4] - 7d8: f8340010 ldadd x20, x16, [x0] - 7dc: f8241175 ldclr x4, x21, [x11] - 7e0: f83e22d0 ldeor x30, x16, [x22] - 7e4: f82432ef ldset x4, x15, [x23] - 7e8: f83a5186 ldsmin x26, x6, [x12] - 7ec: f82f41ee ldsmax x15, x14, [x15] - 7f0: f82973b9 ldumin x9, x25, [x29] - 7f4: f82b6194 ldumax x11, x20, [x12] - 7f8: f8b08216 swpa x16, x22, [x16] - 7fc: f8b50358 ldadda x21, x24, [x26] - 800: f8a61206 ldclra x6, x6, [x16] - 804: f8b02219 ldeora x16, x25, [x16] - 808: f8bc3218 ldseta x28, x24, [x16] - 80c: f8ba514f ldsmina x26, x15, [x10] - 810: f8ad428e ldsmaxa x13, x14, [x20] - 814: f8a173d7 ldumina x1, x23, [x30] - 818: f8ae60c2 ldumaxa x14, x2, [x6] - 81c: f8e38328 swpal x3, x8, [x25] - 820: f8e003db ldaddal x0, x27, [x30] - 824: f8e513c5 ldclral x5, x5, [x30] - 828: f8eb2019 ldeoral x11, x25, [x0] - 82c: f8ff3260 ldsetal xzr, x0, [x19] - 830: f8fd513a ldsminal x29, x26, [x9] - 834: f8fa41ec ldsmaxal x26, x12, [x15] - 838: f8eb71eb lduminal x11, x11, [x15] - 83c: f8f96316 ldumaxal x25, x22, [x24] - 840: f8608171 swpl x0, x17, [x11] - 844: f86600dd ldaddl x6, x29, [x6] - 848: f86512a5 ldclrl x5, x5, [x21] - 84c: f87321f0 ldeorl x19, x16, [x15] - 850: f87e339b ldsetl x30, x27, [x28] - 854: f861503c ldsminl x1, x28, [x1] - 858: f874421d ldsmaxl x20, x29, [x16] - 85c: f86d73aa lduminl x13, x10, [x29] - 860: f87d62d3 ldumaxl x29, x19, [x22] - 864: b82a83e4 swp w10, w4, [sp] - 868: b83503e8 ldadd w21, w8, [sp] - 86c: b833138a ldclr w19, w10, [x28] - 870: b82220b9 ldeor w2, w25, [x5] - 874: b82332c8 ldset w3, w8, [x22] - 878: b83350ad ldsmin w19, w13, [x5] - 87c: b83d42b8 ldsmax w29, w24, [x21] - 880: b83a7078 ldumin w26, w24, [x3] - 884: b83862fa ldumax w24, w26, [x23] - 888: b8af8075 swpa w15, w21, [x3] - 88c: b8b80328 ldadda w24, w8, [x25] - 890: b8b41230 ldclra w20, w16, [x17] - 894: b8a22001 ldeora w2, w1, [x0] - 898: b8b83064 ldseta w24, w4, [x3] - 89c: b8ac539f ldsmina w12, wzr, [x28] - 8a0: b8aa405a ldsmaxa w10, w26, [x2] - 8a4: b8ac73f0 ldumina w12, w16, [sp] - 8a8: b8a163ad ldumaxa w1, w13, [x29] - 8ac: b8e08193 swpal w0, w19, [x12] - 8b0: b8f101b6 ldaddal w17, w22, [x13] - 8b4: b8fc13fe ldclral w28, w30, [sp] - 8b8: b8e1239a ldeoral w1, w26, [x28] - 8bc: b8e4309e ldsetal w4, w30, [x4] - 8c0: b8e6535e ldsminal w6, w30, [x26] - 8c4: b8f04109 ldsmaxal w16, w9, [x8] - 8c8: b8ec7280 lduminal w12, w0, [x20] - 8cc: b8e16058 ldumaxal w1, w24, [x2] - 8d0: b8608309 swpl w0, w9, [x24] - 8d4: b87a03d0 ldaddl w26, w16, [x30] - 8d8: b86312ea ldclrl w3, w10, [x23] - 8dc: b86a21e4 ldeorl w10, w4, [x15] - 8e0: b862310b ldsetl w2, w11, [x8] - 8e4: b86a522f ldsminl w10, w15, [x17] - 8e8: b862418a ldsmaxl w2, w10, [x12] - 8ec: b86c71af lduminl w12, w15, [x13] - 8f0: b8626287 ldumaxl w2, w7, [x20] - 8f4: 042401f9 add z25.b, z15.b, z4.b - 8f8: 04b10564 sub z4.s, z11.s, z17.s - 8fc: 65ca0230 fadd z16.d, z17.d, z10.d - 900: 65d90996 fmul z22.d, z12.d, z25.d - 904: 65ca05dc fsub z28.d, z14.d, z10.d - 908: 0456afc1 abs z1.h, p3/m, z30.h - 90c: 0400044f add z15.b, p1/m, z15.b, z2.b - 910: 0490920d asr z13.s, p4/m, z13.s, z16.s - 914: 04daa163 cnt z3.d, p0/m, z11.d - 918: 04d389c5 lsl z5.d, p2/m, z5.d, z14.d - 91c: 0411829d lsr z29.b, p0/m, z29.b, z20.b - 920: 04901774 mul z20.s, p5/m, z20.s, z27.s - 924: 0417b89a neg z26.b, p6/m, z4.b - 928: 041eb3d6 not z22.b, p4/m, z30.b - 92c: 04480b6b smax z11.h, p2/m, z11.h, z27.h - 930: 048a17dc smin z28.s, p5/m, z28.s, z30.s - 934: 048105be sub z30.s, p1/m, z30.s, z13.s - 938: 04dcb35e fabs z30.d, p4/m, z26.d - 93c: 65808d6f fadd z15.s, p3/m, z15.s, z11.s - 940: 65cd9e06 fdiv z6.d, p7/m, z6.d, z16.d - 944: 65869cfb fmax z27.s, p7/m, z27.s, z7.s - 948: 65c78893 fmin z19.d, p2/m, z19.d, z4.d - 94c: 658292d1 fmul z17.s, p4/m, z17.s, z22.s - 950: 04ddaebc fneg z28.d, p3/m, z21.d - 954: 6582b451 frintm z17.s, p5/m, z2.s - 958: 6580ade6 frintn z6.s, p3/m, z15.s - 95c: 65c1b42c frintp z12.d, p5/m, z1.d - 960: 658da631 fsqrt z17.s, p1/m, z17.s - 964: 658195af fsub z15.s, p5/m, z15.s, z13.s - 968: 65eb1f74 fmla z20.d, p7/m, z27.d, z11.d - 96c: 65f723c3 fmls z3.d, p0/m, z30.d, z23.d - 970: 65ba4b71 fnmla z17.s, p2/m, z27.s, z26.s - 974: 65fe76c6 fnmls z6.d, p5/m, z22.d, z30.d - 978: 04515f42 mla z2.h, p7/m, z26.h, z17.h - 97c: 04117056 mls z22.b, p4/m, z2.b, z17.b - 980: 04363338 and z24.d, z25.d, z22.d - 984: 04a33191 eor z17.d, z12.d, z3.d - 988: 0470339d orr z29.d, z28.d, z16.d - 98c: 049a2b86 andv s6, p2, z28.s - 990: 045824e7 orv h7, p1, z7.h - 994: 04193509 eorv b9, p5, z8.b - 998: 040837db smaxv b27, p5, z30.b - 99c: 044a221a sminv h26, p0, z16.h - 9a0: 65c73903 fminv d3, p6, z8.d - 9a4: 65c63b55 fmaxv d21, p6, z26.d - 9a8: 65982096 fadda s22, p0, s22, z4.s - 9ac: 04412071 uaddv d17, p0, z3.h - */ +*/ static const unsigned int insns[] = { @@ -1486,30 +1007,30 @@ Disassembly of section .text: 0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061, 0x120cb166, 0x321764bc, 0x52174681, 0x720c0227, 0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01, - 0x14000000, 0x17ffffd7, 0x14000242, 0x94000000, - 0x97ffffd4, 0x9400023f, 0x3400000a, 0x34fffa2a, - 0x3400478a, 0x35000008, 0x35fff9c8, 0x35004728, - 0xb400000b, 0xb4fff96b, 0xb40046cb, 0xb500001d, - 0xb5fff91d, 0xb500467d, 0x10000013, 0x10fff8b3, - 0x10004613, 0x90000013, 0x36300016, 0x3637f836, - 0x36304596, 0x3758000c, 0x375ff7cc, 0x3758452c, + 0x14000000, 0x17ffffd7, 0x140002cd, 0x94000000, + 0x97ffffd4, 0x940002ca, 0x3400000a, 0x34fffa2a, + 0x340058ea, 0x35000008, 0x35fff9c8, 0x35005888, + 0xb400000b, 0xb4fff96b, 0xb400582b, 0xb500001d, + 0xb5fff91d, 0xb50057dd, 0x10000013, 0x10fff8b3, + 0x10005773, 0x90000013, 0x36300016, 0x3637f836, + 0x363056f6, 0x3758000c, 0x375ff7cc, 0x3758568c, 0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc, 0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f, 0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016, 0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0, - 0x54004300, 0x54000001, 0x54fff541, 0x540042a1, - 0x54000002, 0x54fff4e2, 0x54004242, 0x54000002, - 0x54fff482, 0x540041e2, 0x54000003, 0x54fff423, - 0x54004183, 0x54000003, 0x54fff3c3, 0x54004123, - 0x54000004, 0x54fff364, 0x540040c4, 0x54000005, - 0x54fff305, 0x54004065, 0x54000006, 0x54fff2a6, - 0x54004006, 0x54000007, 0x54fff247, 0x54003fa7, - 0x54000008, 0x54fff1e8, 0x54003f48, 0x54000009, - 0x54fff189, 0x54003ee9, 0x5400000a, 0x54fff12a, - 0x54003e8a, 0x5400000b, 0x54fff0cb, 0x54003e2b, - 0x5400000c, 0x54fff06c, 0x54003dcc, 0x5400000d, - 0x54fff00d, 0x54003d6d, 0x5400000e, 0x54ffefae, - 0x54003d0e, 0x5400000f, 0x54ffef4f, 0x54003caf, + 0x54005460, 0x54000001, 0x54fff541, 0x54005401, + 0x54000002, 0x54fff4e2, 0x540053a2, 0x54000002, + 0x54fff482, 0x54005342, 0x54000003, 0x54fff423, + 0x540052e3, 0x54000003, 0x54fff3c3, 0x54005283, + 0x54000004, 0x54fff364, 0x54005224, 0x54000005, + 0x54fff305, 0x540051c5, 0x54000006, 0x54fff2a6, + 0x54005166, 0x54000007, 0x54fff247, 0x54005107, + 0x54000008, 0x54fff1e8, 0x540050a8, 0x54000009, + 0x54fff189, 0x54005049, 0x5400000a, 0x54fff12a, + 0x54004fea, 0x5400000b, 0x54fff0cb, 0x54004f8b, + 0x5400000c, 0x54fff06c, 0x54004f2c, 0x5400000d, + 0x54fff00d, 0x54004ecd, 0x5400000e, 0x54ffefae, + 0x54004e6e, 0x5400000f, 0x54ffef4f, 0x54004e0f, 0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60, 0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0, 0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200, @@ -1541,7 +1062,7 @@ Disassembly of section .text: 0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176, 0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422, 0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a, - 0xbd1b1869, 0x58002cfb, 0x1800000b, 0xf8945060, + 0xbd1b1869, 0x58003e5b, 0x1800000b, 0xf8945060, 0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035, 0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380, 0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b11, @@ -1580,58 +1101,92 @@ Disassembly of section .text: 0x4d60c239, 0x0dffcbc1, 0x4de9ce30, 0x4cc24999, 0x0c404a7a, 0x4d40e6af, 0x4ddfe9b9, 0x0dddef8e, 0x4cdf07b1, 0x0cc000fb, 0x0d60e238, 0x0dffe740, - 0x0de2eb2c, 0xce648376, 0xce6184c7, 0xcec081fa, - 0xce6d89a2, 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, - 0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f, - 0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1, - 0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, - 0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, - 0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420, - 0x04319160, 0x0461943e, 0x04a19020, 0x042053ff, - 0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001, - 0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, - 0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, - 0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0, - 0xe4a84fe0, 0xe5f15000, 0x858043e0, 0x85a043ff, - 0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000, - 0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000, - 0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000, - 0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, - 0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000, - 0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000, - 0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000, - 0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, - 0x1e7e3000, 0xf8388098, 0xf8340010, 0xf8241175, - 0xf83e22d0, 0xf82432ef, 0xf83a5186, 0xf82f41ee, - 0xf82973b9, 0xf82b6194, 0xf8b08216, 0xf8b50358, - 0xf8a61206, 0xf8b02219, 0xf8bc3218, 0xf8ba514f, - 0xf8ad428e, 0xf8a173d7, 0xf8ae60c2, 0xf8e38328, - 0xf8e003db, 0xf8e513c5, 0xf8eb2019, 0xf8ff3260, - 0xf8fd513a, 0xf8fa41ec, 0xf8eb71eb, 0xf8f96316, - 0xf8608171, 0xf86600dd, 0xf86512a5, 0xf87321f0, - 0xf87e339b, 0xf861503c, 0xf874421d, 0xf86d73aa, - 0xf87d62d3, 0xb82a83e4, 0xb83503e8, 0xb833138a, - 0xb82220b9, 0xb82332c8, 0xb83350ad, 0xb83d42b8, - 0xb83a7078, 0xb83862fa, 0xb8af8075, 0xb8b80328, - 0xb8b41230, 0xb8a22001, 0xb8b83064, 0xb8ac539f, - 0xb8aa405a, 0xb8ac73f0, 0xb8a163ad, 0xb8e08193, - 0xb8f101b6, 0xb8fc13fe, 0xb8e1239a, 0xb8e4309e, - 0xb8e6535e, 0xb8f04109, 0xb8ec7280, 0xb8e16058, - 0xb8608309, 0xb87a03d0, 0xb86312ea, 0xb86a21e4, - 0xb862310b, 0xb86a522f, 0xb862418a, 0xb86c71af, - 0xb8626287, 0x042401f9, 0x04b10564, 0x65ca0230, - 0x65d90996, 0x65ca05dc, 0x0456afc1, 0x0400044f, - 0x0490920d, 0x04daa163, 0x04d389c5, 0x0411829d, - 0x04901774, 0x0417b89a, 0x041eb3d6, 0x04480b6b, - 0x048a17dc, 0x048105be, 0x04dcb35e, 0x65808d6f, - 0x65cd9e06, 0x65869cfb, 0x65c78893, 0x658292d1, - 0x04ddaebc, 0x6582b451, 0x6580ade6, 0x65c1b42c, - 0x658da631, 0x658195af, 0x65eb1f74, 0x65f723c3, - 0x65ba4b71, 0x65fe76c6, 0x04515f42, 0x04117056, - 0x04363338, 0x04a33191, 0x0470339d, 0x049a2b86, - 0x045824e7, 0x04193509, 0x040837db, 0x044a221a, - 0x65c73903, 0x65c63b55, 0x65982096, 0x04412071, - + 0x0de2eb2c, 0x0e31baf6, 0x4e31bb9b, 0x0e71b8a4, + 0x4e71b907, 0x4eb1b8e6, 0x0e30a841, 0x4e30ab7a, + 0x0e70aa0f, 0x4e70a862, 0x4eb0a9cd, 0x6e30f9cd, + 0x0e31ab38, 0x4e31ab17, 0x0e71a8a4, 0x4e71aa93, + 0x4eb1aa0f, 0x6eb0f820, 0x0e20b8a4, 0x4e20bab4, + 0x0e60b98b, 0x4e60bbdd, 0x0ea0ba0f, 0x4ea0bad5, + 0x4ee0b8a4, 0x0ea0f9ee, 0x4ea0faf6, 0x4ee0fb59, + 0x2ea0f8e6, 0x6ea0f9ac, 0x6ee0f9ee, 0x2ea1f9cd, + 0x6ea1f9ee, 0x6ee1f949, 0x2e205b59, 0x6e205bbc, + 0x0e2c1d6a, 0x4e351e93, 0x0ead1d8b, 0x4eb31e51, + 0x2e371ed5, 0x6e311e0f, 0x0e3686b4, 0x4e398717, + 0x0e7c877a, 0x4e6784c5, 0x0ea884e6, 0x4eb1860f, + 0x4ef1860f, 0x0e3bd759, 0x4e32d630, 0x4e7dd79b, + 0x2e3a8738, 0x6e31860f, 0x2e7b8759, 0x6e7085ee, + 0x2eac856a, 0x6eaf85cd, 0x6ef085ee, 0x0eb6d6b4, + 0x4ea3d441, 0x4ef8d6f6, 0x0e209ffe, 0x4e309dee, + 0x0e649c62, 0x4e689ce6, 0x0ea59c83, 0x4ea99d07, + 0x2e3adf38, 0x6e22dc20, 0x6e7ddf9b, 0x0e7f97dd, + 0x4e6794c5, 0x0ea794c5, 0x4ebf97dd, 0x0e2dcd8b, + 0x4e3bcf59, 0x4e62cc20, 0x2e6097fe, 0x6e629420, + 0x2eb39651, 0x6ebe97bc, 0x0ebbcf59, 0x4eabcd49, + 0x4efbcf59, 0x2e2efdac, 0x6e31fe0f, 0x6e6dfd8b, + 0x0e2c656a, 0x4e336651, 0x0e7a6738, 0x4e7766d5, + 0x0eb96717, 0x4ea26420, 0x0e32f630, 0x4e2cf56a, + 0x4e68f4e6, 0x0e3e6fbc, 0x4e286ce6, 0x0e676cc5, + 0x4e676cc5, 0x0eb66eb4, 0x4eb36e51, 0x0eb1f60f, + 0x4eb3f651, 0x4efff7dd, 0x2e3c8f7a, 0x6e3e8fbc, + 0x2e638c41, 0x6e7d8f9b, 0x2ea28c20, 0x6eb68eb4, + 0x6efe8fbc, 0x0e31e60f, 0x4e2ee5ac, 0x4e6ce56a, + 0x0e3e37bc, 0x4e3e37bc, 0x0e753693, 0x4e7836f6, + 0x0eac356a, 0x4ea634a4, 0x4ee037fe, 0x2eb6e6b4, + 0x6eaae528, 0x6ee0e7fe, 0x0e333e51, 0x4e2c3d6a, + 0x0e7d3f9b, 0x4e643c62, 0x0eba3f38, 0x4ea63ca4, + 0x4ee53c83, 0x2e2ae528, 0x6e38e6f6, 0x6e73e651, + 0xba5fd3e3, 0x3a5f03e5, 0xfa411be4, 0x7a42cbe2, + 0x93df03ff, 0xc820ffff, 0x8822fc7f, 0xc8247cbf, + 0x88267fff, 0x4e010fe0, 0x4e081fe1, 0x4e0c1fe1, + 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f, 0x05a08020, + 0x04b0e3e0, 0x0470e7e1, 0x042f9c20, 0x043f9c35, + 0x047f9c20, 0x04ff9c20, 0x04299420, 0x04319160, + 0x0461943e, 0x04a19020, 0x042053ff, 0x047f5401, + 0x25208028, 0x2538cfe0, 0x2578d001, 0x25b8efe2, + 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea, 0xa547a814, + 0xa4084ffe, 0xa55c53e0, 0xa5e1540b, 0xe400fbf6, + 0xe408ffff, 0xe547e400, 0xe4014be0, 0xe4a84fe0, + 0xe5f15000, 0x858043e0, 0x85a043ff, 0xe59f5d08, + 0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000, + 0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000, + 0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000, + 0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000, + 0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000, + 0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000, + 0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000, + 0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000, + 0xf82d83a5, 0xf8380355, 0xf8381303, 0xf83a21f7, + 0xf8353303, 0xf8285299, 0xf8304051, 0xf8217300, + 0xf8246183, 0xf8bf815c, 0xf8ba0182, 0xf8b0103f, + 0xf8ad201d, 0xf8b3322c, 0xf8b6538d, 0xf8be403f, + 0xf8ba709c, 0xf8be60c4, 0xf8fe81fa, 0xf8e90188, + 0xf8e01034, 0xf8f82002, 0xf8e93358, 0xf8f0507e, + 0xf8ea4157, 0xf8e47050, 0xf8eb6148, 0xf86f8051, + 0xf86a018c, 0xf86f104d, 0xf8672354, 0xf8703044, + 0xf86451ec, 0xf87541f0, 0xf86b72f5, 0xf86c62fa, + 0xb83c816e, 0xb8380181, 0xb83f120a, 0xb8272062, + 0xb82d3233, 0xb8305023, 0xb82b40be, 0xb82873af, + 0xb83e6280, 0xb8a782f4, 0xb8bc0375, 0xb8b91025, + 0xb8b723f0, 0xb8a5312c, 0xb8bc53af, 0xb8b6427f, + 0xb8bf71c5, 0xb8b061ff, 0xb8fb8214, 0xb8ec012b, + 0xb8e6123e, 0xb8fb23dc, 0xb8e7328a, 0xb8ea5304, + 0xb8f142d1, 0xb8e371fd, 0xb8f66273, 0xb87681e2, + 0xb866020c, 0xb86b12ed, 0xb861227e, 0xb8653051, + 0xb87051b6, 0xb86a43b5, 0xb87b736c, 0xb86363e1, + 0xce312677, 0xce0e1b5b, 0xce7e8ed4, 0xce9ed858, + 0xce768151, 0xce718451, 0xcec08300, 0xce628ad9, + 0x04e30191, 0x04f0079d, 0x65dc0126, 0x65870887, + 0x658806c9, 0x0416b7db, 0x0440021a, 0x04d09903, + 0x04dabb55, 0x04138096, 0x04518071, 0x041008c1, + 0x0497bce9, 0x045eb4b6, 0x040813c8, 0x04ca0171, + 0x0481035c, 0x04dcadbc, 0x658098b0, 0x658d89ed, + 0x6586957a, 0x65879096, 0x65829233, 0x04ddac4e, + 0x6582b6e3, 0x6580a626, 0x6581b21b, 0x658dbc62, + 0x65819266, 0x65f8150c, 0x65b72151, 0x65b05db3, + 0x65f165c0, 0x04944ac8, 0x048f607b, 0x042430f4, + 0x04a83007, 0x046432d3, 0x04da3569, 0x04583e05, + 0x04592c36, 0x04c83608, 0x048a248f, 0x658727a8, + 0x65c633bc, 0x65982c49, 0x040120fc, }; // END Generated code -- do not edit diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 711d9db07e5..7ff9c018bef 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -217,7 +217,7 @@ class Instruction_aarch64 { static void patch(address a, int msb, int lsb, uint64_t val) { int nbits = msb - lsb + 1; - guarantee(val < (1U << nbits), "Field too big for insn"); + guarantee(val < (1ULL << nbits), "Field too big for insn"); assert_cond(msb >= lsb); unsigned mask = (1U << nbits) - 1; val <<= lsb; @@ -445,8 +445,8 @@ class Address { } Register base() const { - guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg - | _mode == post | _mode == post_reg), + guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg + || _mode == post || _mode == post_reg), "wrong mode"); return _base; } @@ -1371,6 +1371,21 @@ class Assembler : public AbstractAssembler { #undef INSN +#define INSN(NAME, size, opc) \ + void NAME(FloatRegister Rt, Register Rn) { \ + starti; \ + f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \ + f(0, 20, 12), f(0b01, 11, 10); \ + rf(Rn, 5), rf((Register)Rt, 0); \ + } + + INSN(ldrs, 0b10, 0b01); + INSN(ldrd, 0b11, 0b01); + INSN(ldrq, 0b00, 0b11); + +#undef INSN + + #define INSN(NAME, opc, V) \ void NAME(address dest, prfop op = PLDL1KEEP) { \ int64_t offset = (dest - pc()) >> 2; \ @@ -1508,6 +1523,21 @@ class Assembler : public AbstractAssembler { #undef INSN +/* SIMD extensions + * + * We just use FloatRegister in the following. They are exactly the same + * as SIMD registers. + */ +public: + + enum SIMD_Arrangement { + T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q + }; + + enum SIMD_RegVariant { + B, H, S, D, Q + }; + enum shift_kind { LSL, LSR, ASR, ROR }; void op_shifted_reg(unsigned decode, @@ -1887,6 +1917,30 @@ void mvnw(Register Rd, Register Rm, i_fmovs(Vd, Vn); } +private: + void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta, + FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) { + assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1)) + || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement"); + starti; + int op30 = (do_extend ? Tb : Ta) & 1; + int op22 = ((do_extend ? Ta : Tb) >> 1) & 1; + f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22); + f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10); + rf(Vn, 5), rf(Vd, 0); + } + +public: + void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { + assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement"); + _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true); + } + + void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { + assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement"); + _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false); + } + #undef INSN // Floating-point data-processing (2 source) @@ -2023,6 +2077,43 @@ void mvnw(Register Rd, Register Rm, #undef INSN + enum sign_kind { SIGNED, UNSIGNED }; + +private: + void _xcvtf_scalar_integer(sign_kind sign, unsigned sz, + FloatRegister Rd, FloatRegister Rn) { + starti; + f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29); + f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10); + rf(Rn, 5), rf(Rd, 0); + } + +public: +#define INSN(NAME, sign, sz) \ + void NAME(FloatRegister Rd, FloatRegister Rn) { \ + _xcvtf_scalar_integer(sign, sz, Rd, Rn); \ + } + + INSN(scvtfs, SIGNED, 0); + INSN(scvtfd, SIGNED, 1); + +#undef INSN + +private: + void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T, + FloatRegister Rd, FloatRegister Rn) { + assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); + starti; + f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29); + f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10); + rf(Rn, 5), rf(Rd, 0); + } + +public: + void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) { + _xcvtf_vector_integer(SIGNED, T, Rd, Rn); + } + // Floating-point compare void float_compare(unsigned op31, unsigned type, unsigned op, unsigned op2, @@ -2152,21 +2243,6 @@ void mvnw(Register Rd, Register Rm, INSN(frintzd, 0b01, 0b011); #undef INSN -/* SIMD extensions - * - * We just use FloatRegister in the following. They are exactly the same - * as SIMD registers. - */ - public: - - enum SIMD_Arrangement { - T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q - }; - - enum SIMD_RegVariant { - B, H, S, D, Q - }; - private: static short SIMD_Size_in_bytes[]; @@ -2324,6 +2400,11 @@ void mvnw(Register Rd, Register Rm, INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S + INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S + INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S + INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D + INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D + INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D #undef INSN @@ -2343,6 +2424,8 @@ void mvnw(Register Rd, Register Rm, INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S + INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S + INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B @@ -2407,6 +2490,9 @@ void mvnw(Register Rd, Register Rm, INSN(fmls, 0, 1, 0b110011); INSN(fmax, 0, 0, 0b111101); INSN(fmin, 0, 1, 0b111101); + INSN(fcmeq, 0, 0, 0b111001); + INSN(fcmgt, 1, 1, 0b111001); + INSN(fcmge, 1, 0, 0b111001); #undef INSN @@ -2464,6 +2550,40 @@ void mvnw(Register Rd, Register Rm, #undef INSN +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \ + starti; \ + assert(T == T16B, "arrangement must be T16B"); \ + f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(eor3, 0b000); + INSN(bcax, 0b001); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \ + starti; \ + assert(T == T2D, "arrangement must be T2D"); \ + f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(xar, 0b100); + +#undef INSN + +#define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ + assert(T == T2D, "arrangement must be T2D"); \ + f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \ + } + + INSN(rax1, 0b011); + +#undef INSN + #define INSN(NAME, opc) \ void NAME(FloatRegister Vd, FloatRegister Vn) { \ starti; \ @@ -2506,10 +2626,20 @@ void mvnw(Register Rd, Register Rm, rf(Vn, 5), rf(Vd, 0); } - // (double) {a, b} -> (a + b) - void faddpd(FloatRegister Vd, FloatRegister Vn) { + // (long) {a, b} -> (a + b) + void addpd(FloatRegister Vd, FloatRegister Vn) { starti; - f(0b0111111001110000110110, 31, 10); + f(0b0101111011110001101110, 31, 10); + rf(Vn, 5), rf(Vd, 0); + } + + // (Floating-point) {a, b} -> (a + b) + void faddp(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) { + assert(type == D || type == S, "Wrong type for faddp"); + starti; + f(0b011111100, 31, 23); + f(type == D ? 1 : 0, 22); + f(0b110000110110, 21, 10); rf(Vn, 5), rf(Vd, 0); } @@ -2558,6 +2688,8 @@ void mvnw(Register Rd, Register Rm, INSN(shl, 0, 0b010101, /* isSHR = */ false); INSN(sshr, 0, 0b000001, /* isSHR = */ true); INSN(ushr, 1, 0b000001, /* isSHR = */ true); + INSN(usra, 1, 0b000101, /* isSHR = */ true); + INSN(ssra, 0, 0b000101, /* isSHAR =*/ true); #undef INSN @@ -2576,29 +2708,48 @@ void mvnw(Register Rd, Register Rm, #undef INSN private: - void _ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { starti; /* The encodings for the immh:immb fields (bits 22:16) are - * 0001 xxx 8H, 8B/16b shift = xxx + * 0001 xxx 8H, 8B/16B shift = xxx * 001x xxx 4S, 4H/8H shift = xxxx * 01xx xxx 2D, 2S/4S shift = xxxxx * 1xxx xxx RESERVED */ assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value"); - f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16); + f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23); + f((1 << ((Tb>>1)+3))|shift, 22, 16); f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); } public: void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); - _ushll(Vd, Ta, Vn, Tb, shift); + _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); } void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); - _ushll(Vd, Ta, Vn, Tb, shift); + _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); + } + + void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { + ushll(Vd, Ta, Vn, Tb, 0); + } + + void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); + _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); + } + + void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { + assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); + _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); + } + + void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { + sshll(Vd, Ta, Vn, Tb, 0); } // Move from general purpose register @@ -2649,6 +2800,15 @@ void mvnw(Register Rd, Register Rm, f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); } + void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { + starti; + int size_b = (int)Tb >> 1; + int size_a = (int)Ta >> 1; + assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); + f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22); + f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0); + } + void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) { starti; @@ -3062,13 +3222,6 @@ void mvnw(Register Rd, Register Rm, Assembler(CodeBuffer* code) : AbstractAssembler(code) { } - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - ShouldNotCallThis(); - return RegisterOrConstant(); - } - // Stack overflow checking virtual void bang_stack_with_offset(int offset); diff --git a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp index 99469bb04c0..119bc979e0a 100644 --- a/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_CodeStubs_aarch64.cpp @@ -38,6 +38,19 @@ #define __ ce->masm()-> +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); + __ adr(rscratch1, safepoint_pc); + __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset())); + + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + __ far_jump(RuntimeAddress(stub)); +} + void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); Metadata *m = _method->as_constant_ptr()->as_metadata(); diff --git a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp index b5ab058d44c..8dac1d9ebe8 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp @@ -504,7 +504,7 @@ void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) { } } -void LIR_Assembler::return_op(LIR_Opr result) { +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == r0, "word returns are in r0,"); // Pop the stack before the safepoint code @@ -514,7 +514,9 @@ void LIR_Assembler::return_op(LIR_Opr result) { __ reserved_stack_check(); } - __ fetch_and_read_polling_page(rscratch1, relocInfo::poll_return_type); + code_stub->set_safepoint_offset(__ offset()); + __ relocate(relocInfo::poll_return_type); + __ safepoint_poll(*code_stub->entry(), true /* at_return */, false /* acquire */, true /* in_nmethod */); __ ret(lr); } diff --git a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp index 4e4262d5d6d..d2520014ed1 100644 --- a/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c1_globals_aarch64.hpp @@ -34,8 +34,6 @@ #ifndef TIERED define_pd_global(bool, BackgroundCompilation, true ); -define_pd_global(bool, UseTLAB, true ); -define_pd_global(bool, ResizeTLAB, true ); define_pd_global(bool, InlineIntrinsics, true ); define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, false); diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp index 24b32187b7c..032e9e80756 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp @@ -538,6 +538,70 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, BIND(DONE); } +void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3) +{ + Label CH1_LOOP, HAS_ZERO, DO1_SHORT, DO1_LOOP, MATCH, NOMATCH, DONE; + Register cnt1_neg = cnt1; + Register ch1 = rscratch1; + Register result_tmp = rscratch2; + + cbz(cnt1, NOMATCH); + + cmp(cnt1, (u1)8); + br(LT, DO1_SHORT); + + orr(ch, ch, ch, LSL, 8); + orr(ch, ch, ch, LSL, 16); + orr(ch, ch, ch, LSL, 32); + + sub(cnt1, cnt1, 8); + mov(result_tmp, cnt1); + lea(str1, Address(str1, cnt1)); + sub(cnt1_neg, zr, cnt1); + + mov(tmp3, 0x0101010101010101); + + BIND(CH1_LOOP); + ldr(ch1, Address(str1, cnt1_neg)); + eor(ch1, ch, ch1); + sub(tmp1, ch1, tmp3); + orr(tmp2, ch1, 0x7f7f7f7f7f7f7f7f); + bics(tmp1, tmp1, tmp2); + br(NE, HAS_ZERO); + adds(cnt1_neg, cnt1_neg, 8); + br(LT, CH1_LOOP); + + cmp(cnt1_neg, (u1)8); + mov(cnt1_neg, 0); + br(LT, CH1_LOOP); + b(NOMATCH); + + BIND(HAS_ZERO); + rev(tmp1, tmp1); + clz(tmp1, tmp1); + add(cnt1_neg, cnt1_neg, tmp1, LSR, 3); + b(MATCH); + + BIND(DO1_SHORT); + mov(result_tmp, cnt1); + lea(str1, Address(str1, cnt1)); + sub(cnt1_neg, zr, cnt1); + BIND(DO1_LOOP); + ldrb(ch1, Address(str1, cnt1_neg)); + cmp(ch, ch1); + br(EQ, MATCH); + adds(cnt1_neg, cnt1_neg, 1); + br(LT, DO1_LOOP); + BIND(NOMATCH); + mov(result, -1); + b(DONE); + BIND(MATCH); + add(result, result_tmp, cnt1_neg); + BIND(DONE); +} + // Compare strings. void C2_MacroAssembler::string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2, diff --git a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp index f359e35974a..b2f6226bf9e 100644 --- a/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp @@ -45,4 +45,8 @@ Register ch, Register result, Register tmp1, Register tmp2, Register tmp3); + void stringL_indexof_char(Register str1, Register cnt1, + Register ch, Register result, + Register tmp1, Register tmp2, Register tmp3); + #endif // CPU_AARCH64_C2_MACROASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp index 973cbe740bd..5a019eba6ae 100644 --- a/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/c2_globals_aarch64.hpp @@ -33,8 +33,6 @@ // (see c2_globals.hpp). Alpha-sorted. define_pd_global(bool, BackgroundCompilation, true); -define_pd_global(bool, UseTLAB, true); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(bool, CICompileOSR, true); define_pd_global(bool, InlineIntrinsics, true); define_pd_global(bool, PreferInterpreterNativeStubs, false); diff --git a/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp new file mode 100644 index 00000000000..fb36406fbde --- /dev/null +++ b/src/hotspot/cpu/aarch64/c2_safepointPollStubTable_aarch64.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm. +void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + RuntimeAddress callback_addr(stub); + + __ bind(entry->_stub_label); + InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); + __ adr(rscratch1, safepoint_pc); + __ str(rscratch1, Address(rthread, JavaThread::saved_exception_pc_offset())); + __ far_jump(callback_addr); +} +#undef __ diff --git a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp index 75cc249cf08..2e89960778e 100644 --- a/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/compiledIC_aarch64.cpp @@ -36,6 +36,9 @@ #define __ _masm. address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { + precond(cbuf.stubs()->start() != badAddress); + precond(cbuf.stubs()->end() != badAddress); + // Stub is fixed up when the corresponding call is converted from // calling compiled code to calling interpreted code. // mov rmethod, 0 diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.cpp b/src/hotspot/cpu/aarch64/frame_aarch64.cpp index 46261c70dbe..15c5e16f380 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.cpp @@ -37,6 +37,7 @@ #include "runtime/monitorChunk.hpp" #include "runtime/os.inline.hpp" #include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" #include "runtime/stubCodeGenerator.hpp" #include "runtime/stubRoutines.hpp" #include "vmreg_aarch64.inline.hpp" @@ -476,8 +477,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { } //------------------------------------------------------------------------------ -// frame::sender -frame frame::sender(RegisterMap* map) const { +// frame::sender_raw +frame frame::sender_raw(RegisterMap* map) const { // Default is we done have to follow them. The sender_for_xxx will // update it accordingly map->set_include_argument_oops(false); @@ -499,6 +500,16 @@ frame frame::sender(RegisterMap* map) const { return frame(sender_sp(), link(), sender_pc()); } +frame frame::sender(RegisterMap* map) const { + frame result = sender_raw(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; +} + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { assert(is_interpreted_frame(), "Not an interpreted frame"); // These are reasonable sanity checks @@ -651,11 +662,12 @@ intptr_t* frame::real_fp() const { #undef DESCRIBE_FP_OFFSET -#define DESCRIBE_FP_OFFSET(name) \ - { \ - uintptr_t *p = (uintptr_t *)fp; \ - printf("0x%016lx 0x%016lx %s\n", (uintptr_t)(p + frame::name##_offset), \ - p[frame::name##_offset], #name); \ +#define DESCRIBE_FP_OFFSET(name) \ + { \ + uintptr_t *p = (uintptr_t *)fp; \ + printf(INTPTR_FORMAT " " INTPTR_FORMAT " %s\n", \ + (uintptr_t)(p + frame::name##_offset), \ + p[frame::name##_offset], #name); \ } static THREAD_LOCAL uintptr_t nextfp; diff --git a/src/hotspot/cpu/aarch64/frame_aarch64.hpp b/src/hotspot/cpu/aarch64/frame_aarch64.hpp index 6c639a05961..e2490d28611 100644 --- a/src/hotspot/cpu/aarch64/frame_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/frame_aarch64.hpp @@ -161,4 +161,7 @@ static jint interpreter_frame_expression_stack_direction() { return -1; } + // returns the sending frame, without applying any barriers + frame sender_raw(RegisterMap* map) const; + #endif // CPU_AARCH64_FRAME_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp index b3530509b03..db9c7577e60 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/c1/shenandoahBarrierSetC1_aarch64.cpp @@ -109,7 +109,7 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt __ xchg(access.resolved_addr(), value_opr, result, tmp); if (access.is_oop()) { - result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false); + result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), ShenandoahBarrierSet::AccessKind::NORMAL); LIR_Opr tmp = gen->new_register(type); __ move(result, tmp); result = tmp; diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp index c2d53df4f67..840464b251f 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.cpp @@ -43,8 +43,6 @@ #define __ masm-> -address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; - void ShenandoahBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, Register src, Register dst, Register count, RegSet saved_regs) { if (is_oop) { @@ -227,18 +225,18 @@ void ShenandoahBarrierSetAssembler::resolve_forward_pointer_not_null(MacroAssemb } } -void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr) { +void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind) { assert(ShenandoahLoadRefBarrier, "Should be enabled"); assert(dst != rscratch2, "need rscratch2"); assert_different_registers(load_addr.base(), load_addr.index(), rscratch1, rscratch2); - Label done; + Label heap_stable, not_cset; __ enter(); Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ ldrb(rscratch2, gc_state); // Check for heap stability - __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, done); + __ tbz(rscratch2, ShenandoahHeap::HAS_FORWARDED_BITPOS, heap_stable); // use r1 for load address Register result_dst = dst; @@ -253,51 +251,48 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl __ lea(r1, load_addr); __ mov(r0, dst); - __ far_call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); - - __ mov(result_dst, r0); - __ pop(to_save, sp); - - __ bind(done); - __ leave(); -} - -void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr) { - if (!ShenandoahLoadRefBarrier) { - return; + // Test for in-cset + if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) { + __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); + __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ ldrb(rscratch2, Address(rscratch2, rscratch1)); + __ tbz(rscratch2, 0, not_cset); } - assert(dst != rscratch2, "need rscratch2"); - - Label is_null; - Label done; - - __ block_comment("load_reference_barrier_native { "); - - __ cbz(dst, is_null); - - __ enter(); - - Address gc_state(rthread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); - __ ldrb(rscratch2, gc_state); - - // Check for heap in evacuation phase - __ tbz(rscratch2, ShenandoahHeap::EVACUATION_BITPOS, done); - - __ mov(rscratch2, dst); __ push_call_clobbered_registers(); - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)); - __ lea(r1, load_addr); - __ mov(r0, rscratch2); + switch (kind) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + if (UseCompressedOops) { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + } else { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + } + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + if (UseCompressedOops) { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)); + } else { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); + } + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); + break; + default: + ShouldNotReachHere(); + } __ blr(lr); - __ mov(rscratch2, r0); + __ mov(rscratch1, r0); __ pop_call_clobbered_registers(); - __ mov(dst, rscratch2); + __ mov(r0, rscratch1); - __ bind(done); + __ bind(not_cset); + + __ mov(result_dst, r0); + __ pop(to_save, sp); + + __ bind(heap_stable); __ leave(); - __ bind(is_null); - __ block_comment("} load_reference_barrier_native"); } void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { @@ -308,15 +303,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Regis } } -void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr) { - if (ShenandoahLoadRefBarrier) { - Label is_null; - __ cbz(dst, is_null); - load_reference_barrier_not_null(masm, dst, load_addr); - __ bind(is_null); - } -} - // // Arguments: // @@ -352,11 +338,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); - if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { - load_reference_barrier_native(masm, dst, src); - } else { - load_reference_barrier(masm, dst, src); - } + ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type); + load_reference_barrier(masm, dst, src, kind); if (dst != result_dst) { __ mov(result_dst, dst); @@ -477,7 +460,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, bool is_narrow = UseCompressedOops; Assembler::operand_size size = is_narrow ? Assembler::word : Assembler::xword; - assert_different_registers(addr, expected, new_val, tmp1, tmp2); + assert_different_registers(addr, expected, tmp1, tmp2); + assert_different_registers(addr, new_val, tmp1, tmp2); Label step4, done; @@ -669,10 +653,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble __ bind(slow_path); ce->store_parameter(res, 0); ce->store_parameter(addr, 1); - if (stub->is_native()) { - __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); - } else { - __ far_call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); + switch (stub->kind()) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + __ far_call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin())); + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + __ far_call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ far_call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); + break; + default: + ShouldNotReachHere(); } __ b(*stub->continuation()); @@ -728,19 +720,33 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ epilogue(); } -void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) { +void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) { __ prologue("shenandoah_load_reference_barrier", false); // arg0 : object to be resolved __ push_call_clobbered_registers(); __ load_parameter(0, r0); __ load_parameter(1, r1); - if (is_native) { - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native)); - } else if (UseCompressedOops) { - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); - } else { - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + switch (kind) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + if (UseCompressedOops) { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); + } else { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); + } + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + if (UseCompressedOops) { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow)); + } else { + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); + } + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak)); + break; + default: + ShouldNotReachHere(); } __ blr(lr); __ mov(rscratch1, r0); @@ -753,67 +759,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s #undef __ #endif // COMPILER1 - -address ShenandoahBarrierSetAssembler::shenandoah_lrb() { - assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); - return _shenandoah_lrb; -} - -#define __ cgen->assembler()-> - -// Shenandoah load reference barrier. -// -// Input: -// r0: OOP to evacuate. Not null. -// r1: load address -// -// Output: -// r0: Pointer to evacuated OOP. -// -// Trash rscratch1, rscratch2. Preserve everything else. -address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { - - __ align(6); - StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); - address start = __ pc(); - - Label slow_path; - __ mov(rscratch2, ShenandoahHeap::in_cset_fast_test_addr()); - __ lsr(rscratch1, r0, ShenandoahHeapRegion::region_size_bytes_shift_jint()); - __ ldrb(rscratch2, Address(rscratch2, rscratch1)); - __ tbnz(rscratch2, 0, slow_path); - __ ret(lr); - - __ bind(slow_path); - __ enter(); // required for proper stackwalking of RuntimeStub frame - - __ push_call_clobbered_registers(); - - if (UseCompressedOops) { - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow)); - } else { - __ mov(lr, CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier)); - } - __ blr(lr); - __ mov(rscratch1, r0); - __ pop_call_clobbered_registers(); - __ mov(r0, rscratch1); - - __ leave(); // required for proper stackwalking of RuntimeStub frame - __ ret(lr); - - return start; -} - -#undef __ - -void ShenandoahBarrierSetAssembler::barrier_stubs_init() { - if (ShenandoahLoadRefBarrier) { - int stub_code_size = 2048; - ResourceMark rm; - BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); - CodeBuffer buf(bb); - StubCodeGenerator cgen(&buf); - _shenandoah_lrb = generate_shenandoah_lrb(&cgen); - } -} diff --git a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp index 88aa9a2b95f..60303725fd8 100644 --- a/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/shenandoah/shenandoahBarrierSetAssembler_aarch64.hpp @@ -27,6 +27,7 @@ #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" #ifdef COMPILER1 class LIR_Assembler; class ShenandoahPreBarrierStub; @@ -38,8 +39,6 @@ class StubCodeGenerator; class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { private: - static address _shenandoah_lrb; - void satb_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -57,14 +56,9 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void resolve_forward_pointer(MacroAssembler* masm, Register dst, Register tmp = noreg); void resolve_forward_pointer_not_null(MacroAssembler* masm, Register dst, Register tmp = noreg); - void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr); - void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address load_addr); - void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address load_addr); - - address generate_shenandoah_lrb(StubCodeGenerator* cgen); + void load_reference_barrier(MacroAssembler* masm, Register dst, Address load_addr, ShenandoahBarrierSet::AccessKind kind); public: - static address shenandoah_lrb(); void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp); @@ -72,7 +66,7 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); - void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native); + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind); #endif virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, @@ -85,8 +79,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { Register obj, Register tmp, Label& slowpath); void cmpxchg_oop(MacroAssembler* masm, Register addr, Register expected, Register new_val, bool acquire, bool release, bool is_cae, Register result); - - virtual void barrier_stubs_init(); }; #endif // CPU_AARCH64_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_AARCH64_HPP diff --git a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp index 35e261fa7ae..3187808b65a 100644 --- a/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/gc/z/zGlobals_aarch64.hpp @@ -24,10 +24,9 @@ #ifndef CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP #define CPU_AARCH64_GC_Z_ZGLOBALS_AARCH64_HPP -const size_t ZPlatformGranuleSizeShift = 21; // 2MB -const size_t ZPlatformHeapViews = 3; -const size_t ZPlatformNMethodDisarmedOffset = 4; -const size_t ZPlatformCacheLineSize = 64; +const size_t ZPlatformGranuleSizeShift = 21; // 2MB +const size_t ZPlatformHeapViews = 3; +const size_t ZPlatformCacheLineSize = 64; size_t ZPlatformAddressOffsetBits(); size_t ZPlatformAddressMetadataShift(); diff --git a/src/hotspot/cpu/aarch64/globals_aarch64.hpp b/src/hotspot/cpu/aarch64/globals_aarch64.hpp index 294b6b13495..9ad1360fa91 100644 --- a/src/hotspot/cpu/aarch64/globals_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/globals_aarch64.hpp @@ -93,6 +93,8 @@ define_pd_global(intx, InlineSmallCode, 1000); "Use SIMD instructions in generated array equals code") \ product(bool, UseSimpleArrayEquals, false, \ "Use simpliest and shortest implementation for array equals") \ + product(bool, UseSIMDForBigIntegerShiftIntrinsics, true, \ + "Use SIMD instructions for left/right shift of BigInteger") \ product(bool, AvoidUnalignedAccesses, false, \ "Avoid generating unaligned memory accesses") \ product(bool, UseLSE, false, \ diff --git a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp index 1d635429336..09632154630 100644 --- a/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/interp_masm_aarch64.cpp @@ -473,7 +473,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, if (needs_thread_local_poll) { NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); - ldr(rscratch2, Address(rthread, Thread::polling_page_offset())); + ldr(rscratch2, Address(rthread, Thread::polling_word_offset())); tbnz(rscratch2, exact_log2(SafepointMechanism::poll_bit()), safepoint); } @@ -521,6 +521,7 @@ void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { // remove activation // +// Apply stack watermark barrier. // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from syncronized blocks. // Remove the activation from the stack. @@ -541,6 +542,21 @@ void InterpreterMacroAssembler::remove_activation( // result check if synchronized method Label unlocked, unlock, no_unlock; + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, true /* at_return */, false /* acquire */, false /* in_nmethod */); + br(Assembler::AL, fast_path); + bind(slow_path); + push(state); + set_last_Java_frame(esp, rfp, (address)pc(), rscratch1); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); + reset_last_Java_frame(true); + pop(state); + bind(fast_path); + // get the value of _do_not_unlock_if_synchronized into r3 const Address do_not_unlock_if_synchronized(rthread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); diff --git a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp index 3156b4b8e83..f41d79e1021 100644 --- a/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/jvmciCodeInstaller_aarch64.cpp @@ -21,8 +21,9 @@ * questions. */ - #include "jvmci/jvmci.hpp" - #include "jvmci/jvmciCodeInstaller.hpp" +#include "precompiled.hpp" +#include "jvmci/jvmci.hpp" +#include "jvmci/jvmciCodeInstaller.hpp" #include "jvmci/jvmciRuntime.hpp" #include "jvmci/jvmciCompilerToVM.hpp" #include "jvmci/jvmciJavaClasses.hpp" diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp index 81fd87614e5..005ad3f5930 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp @@ -288,27 +288,21 @@ address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) { return address(((uint64_t)insn_addr + (offset << 2))); } -void MacroAssembler::safepoint_poll(Label& slow_path) { - ldr(rscratch1, Address(rthread, Thread::polling_page_offset())); - tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path); -} - -// Just like safepoint_poll, but use an acquiring load for thread- -// local polling. -// -// We need an acquire here to ensure that any subsequent load of the -// global SafepointSynchronize::_state flag is ordered after this load -// of the local Thread::_polling page. We don't want this poll to -// return false (i.e. not safepointing) and a later poll of the global -// SafepointSynchronize::_state spuriously to return true. -// -// This is to avoid a race when we're in a native->Java transition -// racing the code which wakes up from a safepoint. -// -void MacroAssembler::safepoint_poll_acquire(Label& slow_path) { - lea(rscratch1, Address(rthread, Thread::polling_page_offset())); - ldar(rscratch1, rscratch1); - tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path); +void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) { + if (acquire) { + lea(rscratch1, Address(rthread, Thread::polling_word_offset())); + ldar(rscratch1, rscratch1); + } else { + ldr(rscratch1, Address(rthread, Thread::polling_word_offset())); + } + if (at_return) { + // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, + // we may safely use the sp instead to perform the stack watermark check. + cmp(in_nmethod ? sp : rfp, rscratch1); + br(Assembler::HI, slow_path); + } else { + tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path); + } } void MacroAssembler::reset_last_Java_frame(bool clear_fp) { @@ -711,7 +705,7 @@ void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, in // Maybe emit a call via a trampoline. If the code cache is small // trampolines won't be emitted. -address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { +address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) { assert(JavaThread::current()->is_Compiler_thread(), "just checking"); assert(entry.rspec().type() == relocInfo::runtime_call_type || entry.rspec().type() == relocInfo::opt_virtual_call_type @@ -732,6 +726,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { if (!in_scratch_emit_size) { address stub = emit_trampoline_stub(offset(), entry.target()); if (stub == NULL) { + postcond(pc() == badAddress); return NULL; // CodeCache is full } } @@ -745,6 +740,7 @@ address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) { bl(pc()); } // just need to return a non-null address + postcond(pc() != badAddress); return pc(); } @@ -938,23 +934,6 @@ void MacroAssembler::check_and_handle_earlyret(Register java_thread) { } void MacroAssembler::check_and_handle_popframe(Register java_thread) { } - -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) - return RegisterOrConstant(value + offset); - - // load indirectly to solve generation ordering problem - ldr(tmp, ExternalAddress((address) delayed_value_addr)); - - if (offset != 0) - add(tmp, tmp, offset); - - return RegisterOrConstant(tmp); -} - // Look up the method for a megamorphic invokeinterface call. // The target method is determined by . // The receiver klass is in recv_klass. @@ -1834,7 +1813,7 @@ bool MacroAssembler::try_merge_ldst(Register rt, const Address &adr, size_t size return true; } else { assert(size_in_bytes == 8 || size_in_bytes == 4, "only 8 bytes or 4 bytes load/store is supported."); - const unsigned mask = size_in_bytes - 1; + const uint64_t mask = size_in_bytes - 1; if (adr.getMode() == Address::base_plus_offset && (adr.offset() & mask) == 0) { // only supports base_plus_offset. code()->set_last_insn(pc()); @@ -2898,7 +2877,7 @@ void MacroAssembler::merge_ldst(Register rt, // Overwrite previous generated binary. code_section()->set_end(prev); - const int sz = prev_ldst->size_in_bytes(); + const size_t sz = prev_ldst->size_in_bytes(); assert(sz == 8 || sz == 4, "only supports 64/32bit merging."); if (!is_store) { BLOCK_COMMENT("merged ldr pair"); @@ -4405,13 +4384,6 @@ void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) ldr(dest, Address(rthread, Thread::polling_page_offset())); } -// Move the address of the polling page into r, then read the polling -// page. -address MacroAssembler::fetch_and_read_polling_page(Register r, relocInfo::relocType rtype) { - get_polling_page(r, rtype); - return read_polling_page(r, rtype); -} - // Read the polling page. The address of the polling page must // already be in r. address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) { @@ -4503,7 +4475,7 @@ void MacroAssembler::remove_frame(int framesize) { // This method checks if provided byte array contains byte with highest bit set. -void MacroAssembler::has_negatives(Register ary1, Register len, Register result) { +address MacroAssembler::has_negatives(Register ary1, Register len, Register result) { // Simple and most common case of aligned small array which is not at the // end of memory page is placed here. All other cases are in stub. Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE; @@ -4540,27 +4512,38 @@ void MacroAssembler::has_negatives(Register ary1, Register len, Register result) b(SET_RESULT); BIND(STUB); - RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives()); + RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives()); assert(has_neg.target() != NULL, "has_negatives stub has not been generated"); - trampoline_call(has_neg); + address tpc1 = trampoline_call(has_neg); + if (tpc1 == NULL) { + DEBUG_ONLY(reset_labels(STUB_LONG, SET_RESULT, DONE)); + postcond(pc() == badAddress); + return NULL; + } b(DONE); BIND(STUB_LONG); - RuntimeAddress has_neg_long = RuntimeAddress( - StubRoutines::aarch64::has_negatives_long()); + RuntimeAddress has_neg_long = RuntimeAddress(StubRoutines::aarch64::has_negatives_long()); assert(has_neg_long.target() != NULL, "has_negatives stub has not been generated"); - trampoline_call(has_neg_long); + address tpc2 = trampoline_call(has_neg_long); + if (tpc2 == NULL) { + DEBUG_ONLY(reset_labels(SET_RESULT, DONE)); + postcond(pc() == badAddress); + return NULL; + } b(DONE); BIND(SET_RESULT); cset(result, NE); // set true or false BIND(DONE); + postcond(pc() != badAddress); + return pc(); } -void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, - Register tmp4, Register tmp5, Register result, - Register cnt1, int elem_size) { +address MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, + Register tmp4, Register tmp5, Register result, + Register cnt1, int elem_size) { Label DONE, SAME; Register tmp1 = rscratch1; Register tmp2 = rscratch2; @@ -4664,7 +4647,7 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, } } } else { - Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB, EARLY_OUT, + Label NEXT_DWORD, SHORT, TAIL, TAIL2, STUB, CSET_EQ, LAST_CHECK; mov(result, false); cbz(a1, DONE); @@ -4723,10 +4706,14 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, cbnz(tmp5, DONE); RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_array_equals()); assert(stub.target() != NULL, "array_equals_long stub has not been generated"); - trampoline_call(stub); + address tpc = trampoline_call(stub); + if (tpc == NULL) { + DEBUG_ONLY(reset_labels(SHORT, LAST_CHECK, CSET_EQ, SAME, DONE)); + postcond(pc() == badAddress); + return NULL; + } b(DONE); - bind(EARLY_OUT); // (a1 != null && a2 == null) || (a1 != null && a2 != null && a1 == a2) // so, if a2 == null => return false(0), else return true, so we can return a2 mov(result, a2); @@ -4753,6 +4740,8 @@ void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3, bind(DONE); BLOCK_COMMENT("} array_equals"); + postcond(pc() != badAddress); + return pc(); } // Compare Strings @@ -4860,7 +4849,7 @@ const int MacroAssembler::zero_words_block_size = 8; // cnt: Count in HeapWords. // // ptr, cnt, rscratch1, and rscratch2 are clobbered. -void MacroAssembler::zero_words(Register ptr, Register cnt) +address MacroAssembler::zero_words(Register ptr, Register cnt) { assert(is_power_of_2(zero_words_block_size), "adjust this"); assert(ptr == r10 && cnt == r11, "mismatch in register usage"); @@ -4870,10 +4859,15 @@ void MacroAssembler::zero_words(Register ptr, Register cnt) Label around; br(LO, around); { - RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks()); + RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::aarch64::zero_blocks()); assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated"); if (StubRoutines::aarch64::complete()) { - trampoline_call(zero_blocks); + address tpc = trampoline_call(zero_blocks); + if (tpc == NULL) { + DEBUG_ONLY(reset_labels(around)); + postcond(pc() == badAddress); + return NULL; + } } else { bl(zero_blocks); } @@ -4894,6 +4888,8 @@ void MacroAssembler::zero_words(Register ptr, Register cnt) bind(l); } BLOCK_COMMENT("} zero_words"); + postcond(pc() != badAddress); + return pc(); } // base: Address of a buffer to be zeroed, 8 bytes aligned. @@ -4906,14 +4902,15 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt) if (i) str(zr, Address(base)); if (cnt <= SmallArraySize / BytesPerLong) { - for (; i < (int)cnt; i += 2) + for (; i < (int)cnt; i += 2) { stp(zr, zr, Address(base, i * wordSize)); + } } else { const int unroll = 4; // Number of stp(zr, zr) instructions we'll unroll int remainder = cnt % (2 * unroll); - for (; i < remainder; i += 2) + for (; i < remainder; i += 2) { stp(zr, zr, Address(base, i * wordSize)); - + } Label loop; Register cnt_reg = rscratch1; Register loop_base = rscratch2; @@ -4923,8 +4920,9 @@ void MacroAssembler::zero_words(Register base, uint64_t cnt) add(loop_base, base, (remainder - 2) * wordSize); bind(loop); sub(cnt_reg, cnt_reg, 2 * unroll); - for (i = 1; i < unroll; i++) + for (i = 1; i < unroll; i++) { stp(zr, zr, Address(loop_base, 2 * i * wordSize)); + } stp(zr, zr, Address(pre(loop_base, 2 * unroll * wordSize))); cbnz(cnt_reg, loop); } @@ -5140,9 +5138,9 @@ void MacroAssembler::encode_iso_array(Register src, Register dst, // Inflate byte[] array to char[]. -void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, - FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, - Register tmp4) { +address MacroAssembler::byte_array_inflate(Register src, Register dst, Register len, + FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, Register tmp4) { Label big, done, after_init, to_stub; assert_different_registers(src, dst, len, tmp4, rscratch1); @@ -5179,9 +5177,14 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len if (SoftwarePrefetchHintDistance >= 0) { bind(to_stub); - RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate()); + RuntimeAddress stub = RuntimeAddress(StubRoutines::aarch64::large_byte_array_inflate()); assert(stub.target() != NULL, "large_byte_array_inflate stub has not been generated"); - trampoline_call(stub); + address tpc = trampoline_call(stub); + if (tpc == NULL) { + DEBUG_ONLY(reset_labels(big, done)); + postcond(pc() == badAddress); + return NULL; + } b(after_init); } @@ -5235,6 +5238,8 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len strq(vtmp3, Address(dst, -16)); bind(done); + postcond(pc() != badAddress); + return pc(); } // Compress char[] array to byte[]. diff --git a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp index 998f1afc1c7..1d597fb429c 100644 --- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp @@ -102,8 +102,7 @@ class MacroAssembler: public Assembler { virtual void check_and_handle_popframe(Register java_thread); virtual void check_and_handle_earlyret(Register java_thread); - void safepoint_poll(Label& slow_path); - void safepoint_poll_acquire(Label& slow_path); + void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); // Biased locking support // lock_reg and obj_reg must be loaded up with the appropriate values. @@ -1014,10 +1013,6 @@ class MacroAssembler: public Assembler { // Check for reserved stack access in method being exited (for JIT) void reserved_stack_check(); - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset); - // Arithmetics void addptr(const Address &dst, int32_t src); @@ -1063,10 +1058,24 @@ class MacroAssembler: public Assembler { private: void compare_eq(Register rn, Register rm, enum operand_size size); +#ifdef ASSERT + // Template short-hand support to clean-up after a failed call to trampoline + // call generation (see trampoline_call() below), when a set of Labels must + // be reset (before returning). + template + void reset_labels(Label &lbl, More&... more) { + lbl.reset(); reset_labels(more...); + } + template + void reset_labels(Label &lbl) { + lbl.reset(); + } +#endif + public: // Calls - address trampoline_call(Address entry, CodeBuffer *cbuf = NULL); + address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); static bool far_branches() { return ReservedCodeCacheSize > branch_range || UseAOT; @@ -1231,7 +1240,6 @@ class MacroAssembler: public Assembler { address read_polling_page(Register r, relocInfo::relocType rtype); void get_polling_page(Register dest, relocInfo::relocType rtype); - address fetch_and_read_polling_page(Register r, relocInfo::relocType rtype); // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic. void update_byte_crc32(Register crc, Register val, Register table); @@ -1239,24 +1247,24 @@ class MacroAssembler: public Assembler { Register table0, Register table1, Register table2, Register table3, bool upper = false); - void has_negatives(Register ary1, Register len, Register result); + address has_negatives(Register ary1, Register len, Register result); - void arrays_equals(Register a1, Register a2, Register result, Register cnt1, - Register tmp1, Register tmp2, Register tmp3, int elem_size); + address arrays_equals(Register a1, Register a2, Register result, Register cnt1, + Register tmp1, Register tmp2, Register tmp3, int elem_size); void string_equals(Register a1, Register a2, Register result, Register cnt1, int elem_size); void fill_words(Register base, Register cnt, Register value); void zero_words(Register base, uint64_t cnt); - void zero_words(Register ptr, Register cnt); + address zero_words(Register ptr, Register cnt); void zero_dcache_blocks(Register base, Register cnt); static const int zero_words_block_size; - void byte_array_inflate(Register src, Register dst, Register len, - FloatRegister vtmp1, FloatRegister vtmp2, - FloatRegister vtmp3, Register tmp4); + address byte_array_inflate(Register src, Register dst, Register len, + FloatRegister vtmp1, FloatRegister vtmp2, + FloatRegister vtmp3, Register tmp4); void char_array_compress(Register src, Register dst, Register len, FloatRegister tmp1Reg, FloatRegister tmp2Reg, diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp index d40c533a82c..dcf87913a88 100644 --- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp @@ -655,7 +655,7 @@ class NativeLdSt : public NativeInstruction { return 0; } } - size_t size_in_bytes() { return 1 << size(); } + size_t size_in_bytes() { return 1ULL << size(); } bool is_not_pre_post_index() { return (is_ldst_ur() || is_ldst_unsigned_offset()); } bool is_load() { assert(Instruction_aarch64::extract(uint_at(0), 23, 22) == 0b01 || diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index d556d957e6b..92a07a84d2a 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -38,6 +38,7 @@ #include "nativeInst_aarch64.hpp" #include "oops/compiledICHolder.hpp" #include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/vframeArray.hpp" @@ -1080,20 +1081,6 @@ static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMR } } - -// Check GCLocker::needs_gc and enter the runtime if it's true. This -// keeps a new JNI critical region from starting until a GC has been -// forced. Save down any oops in registers and describe them in an -// OopMap. -static void check_needs_gc_for_critical_native(MacroAssembler* masm, - int stack_slots, - int total_c_args, - int total_in_args, - int arg_save_area, - OopMapSet* oop_maps, - VMRegPair* in_regs, - BasicType* in_sig_bt) { Unimplemented(); } - // Unpack an array argument into a pointer to the body and the length // if the array is non-null, otherwise pass 0 for both. static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) { Unimplemented(); } @@ -1259,25 +1246,12 @@ static void gen_special_dispatch(MacroAssembler* masm, // Critical native functions are a shorthand for the use of // GetPrimtiveArrayCritical and disallow the use of any other JNI // functions. The wrapper is expected to unpack the arguments before -// passing them to the callee and perform checks before and after the -// native call to ensure that they GCLocker -// lock_critical/unlock_critical semantics are followed. Some other -// parts of JNI setup are skipped like the tear down of the JNI handle +// passing them to the callee. Critical native functions leave the state _in_Java, +// since they block out GC. +// Some other parts of JNI setup are skipped like the tear down of the JNI handle // block and the check for pending exceptions it's impossible for them // to be thrown. // -// They are roughly structured like this: -// if (GCLocker::needs_gc()) -// SharedRuntime::block_for_jni_critical(); -// tranistion to thread_in_native -// unpack arrray arguments and call native entry point -// check for safepoint in progress -// check if any thread suspend flags are set -// call into JVM and possible unlock the JNI critical -// if a GC was suppressed while in the critical native. -// transition back to thread_in_Java -// return to caller -// nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const methodHandle& method, int compile_id, @@ -1524,7 +1498,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // Generate stack overflow check if (UseStackBanging) { - __ bang_stack_with_offset(StackOverflow::stack_shadow_zone_size()); + __ bang_stack_with_offset(checked_cast(StackOverflow::stack_shadow_zone_size())); } else { Unimplemented(); } @@ -1545,11 +1519,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, const Register oop_handle_reg = r20; - if (is_critical_native) { - check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, - oop_handle_offset, oop_maps, in_regs, in_sig_bt); - } - // // We immediately shuffle the arguments so that any vm call we have to // make from here on out (sync slow path, jvmti, etc.) we will have @@ -1822,12 +1791,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, // get JNIEnv* which is first argument to native if (!is_critical_native) { __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset()))); - } - // Now set thread in native - __ mov(rscratch1, _thread_in_native); - __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); - __ stlrw(rscratch1, rscratch2); + // Now set thread in native + __ mov(rscratch1, _thread_in_native); + __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); + __ stlrw(rscratch1, rscratch2); + } rt_call(masm, native_func); @@ -1855,6 +1824,21 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, default : ShouldNotReachHere(); } + Label safepoint_in_progress, safepoint_in_progress_done; + Label after_transition; + + // If this is a critical native, check for a safepoint or suspend request after the call. + // If a safepoint is needed, transition to native, then to native_trans to handle + // safepoints like the native methods that are not critical natives. + if (is_critical_native) { + Label needs_safepoint; + __ safepoint_poll(needs_safepoint, false /* at_return */, true /* acquire */, false /* in_nmethod */); + __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); + __ cbnzw(rscratch1, needs_safepoint); + __ b(after_transition); + __ bind(needs_safepoint); + } + // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization // state is not atomic w.r.t. GC, as this scenario demonstrates: @@ -1875,16 +1859,23 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, } // check for safepoint operation in progress and/or pending suspend requests - Label safepoint_in_progress, safepoint_in_progress_done; { - __ safepoint_poll_acquire(safepoint_in_progress); + // We need an acquire here to ensure that any subsequent load of the + // global SafepointSynchronize::_state flag is ordered after this load + // of the thread-local polling word. We don't want this poll to + // return false (i.e. not safepointing) and a later poll of the global + // SafepointSynchronize::_state spuriously to return true. + // + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + + __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */); __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset())); __ cbnzw(rscratch1, safepoint_in_progress); __ bind(safepoint_in_progress_done); } // change thread state - Label after_transition; __ mov(rscratch1, _thread_in_Java); __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset())); __ stlrw(rscratch1, rscratch2); @@ -2089,22 +2080,12 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, #ifndef PRODUCT assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area"); #endif - if (!is_critical_native) { - __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); - } else { - __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition))); - } + __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans))); __ blr(rscratch1); __ maybe_isb(); // Restore any method result value restore_native_result(masm, ret_type, stack_slots); - if (is_critical_native) { - // The call above performed the transition to thread_in_Java so - // skip the transition logic above. - __ b(after_transition); - } - __ b(safepoint_in_progress_done); __ block_comment("} safepoint"); } @@ -2153,12 +2134,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), oop_maps); - if (is_critical_native) { - nm->set_lazy_critical_native(true); - } - return nm; - } // this function returns the adjust size (in number of words) to a c2i adapter @@ -2469,7 +2445,7 @@ void SharedRuntime::generate_deopt_blob() { __ sub(sp, sp, r19); // Push interpreter frames in a loop - __ mov(rscratch1, (address)0xDEADDEAD); // Make a recognizable pattern + __ mov(rscratch1, (uint64_t)0xDEADDEAD); // Make a recognizable pattern __ mov(rscratch2, rscratch1); Label loop; __ bind(loop); diff --git a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp index 412578eea5c..09ea5387165 100644 --- a/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp @@ -611,6 +611,16 @@ class StubGenerator: public StubCodeGenerator { void array_overlap_test(Label& L_no_overlap, Address::sxtw sf) { __ b(L_no_overlap); } + // Generate indices for iota vector. + address generate_iota_indices(const char *stub_name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", stub_name); + address start = __ pc(); + __ emit_data64(0x0706050403020100, relocInfo::none); + __ emit_data64(0x0F0E0D0C0B0A0908, relocInfo::none); + return start; + } + // The inner part of zero_words(). This is the bulk operation, // zeroing words in blocks, possibly using DC ZVA to do it. The // caller is responsible for zeroing the last few words. @@ -1295,14 +1305,14 @@ class StubGenerator: public StubCodeGenerator { // Scan over array at a for count oops, verifying each one. // Preserves a and count, clobbers rscratch1 and rscratch2. - void verify_oop_array (size_t size, Register a, Register count, Register temp) { + void verify_oop_array (int size, Register a, Register count, Register temp) { Label loop, end; __ mov(rscratch1, a); __ mov(rscratch2, zr); __ bind(loop); __ cmp(rscratch2, count); __ br(Assembler::HS, end); - if (size == (size_t)wordSize) { + if (size == wordSize) { __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size)))); __ verify_oop(temp); } else { @@ -1333,7 +1343,7 @@ class StubGenerator: public StubCodeGenerator { // disjoint_int_copy_entry is set to the no-overlap entry point // used by generate_conjoint_int_oop_copy(). // - address generate_disjoint_copy(size_t size, bool aligned, bool is_oop, address *entry, + address generate_disjoint_copy(int size, bool aligned, bool is_oop, address *entry, const char *name, bool dest_uninitialized = false) { Register s = c_rarg0, d = c_rarg1, count = c_rarg2; RegSet saved_reg = RegSet::of(s, d, count); @@ -1399,7 +1409,7 @@ class StubGenerator: public StubCodeGenerator { // the hardware handle it. The two dwords within qwords that span // cache line boundaries will still be loaded and stored atomicly. // - address generate_conjoint_copy(size_t size, bool aligned, bool is_oop, address nooverlap_target, + address generate_conjoint_copy(int size, bool aligned, bool is_oop, address nooverlap_target, address *entry, const char *name, bool dest_uninitialized = false) { Register s = c_rarg0, d = c_rarg1, count = c_rarg2; @@ -1650,7 +1660,7 @@ class StubGenerator: public StubCodeGenerator { address generate_disjoint_oop_copy(bool aligned, address *entry, const char *name, bool dest_uninitialized) { const bool is_oop = true; - const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); return generate_disjoint_copy(size, aligned, is_oop, entry, name, dest_uninitialized); } @@ -1668,7 +1678,7 @@ class StubGenerator: public StubCodeGenerator { address nooverlap_target, address *entry, const char *name, bool dest_uninitialized) { const bool is_oop = true; - const size_t size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); + const int size = UseCompressedOops ? sizeof (jint) : sizeof (jlong); return generate_conjoint_copy(size, aligned, is_oop, nooverlap_target, entry, name, dest_uninitialized); } @@ -3299,6 +3309,225 @@ class StubGenerator: public StubCodeGenerator { return start; } + // Arguments: + // + // Inputs: + // c_rarg0 - byte[] source+offset + // c_rarg1 - byte[] SHA.state + // c_rarg2 - int digest_length + // c_rarg3 - int offset + // c_rarg4 - int limit + // + address generate_sha3_implCompress(bool multi_block, const char *name) { + static const uint64_t round_consts[24] = { + 0x0000000000000001L, 0x0000000000008082L, 0x800000000000808AL, + 0x8000000080008000L, 0x000000000000808BL, 0x0000000080000001L, + 0x8000000080008081L, 0x8000000000008009L, 0x000000000000008AL, + 0x0000000000000088L, 0x0000000080008009L, 0x000000008000000AL, + 0x000000008000808BL, 0x800000000000008BL, 0x8000000000008089L, + 0x8000000000008003L, 0x8000000000008002L, 0x8000000000000080L, + 0x000000000000800AL, 0x800000008000000AL, 0x8000000080008081L, + 0x8000000000008080L, 0x0000000080000001L, 0x8000000080008008L + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + Register buf = c_rarg0; + Register state = c_rarg1; + Register digest_length = c_rarg2; + Register ofs = c_rarg3; + Register limit = c_rarg4; + + Label sha3_loop, rounds24_loop; + Label sha3_512, sha3_384_or_224, sha3_256; + + __ stpd(v8, v9, __ pre(sp, -64)); + __ stpd(v10, v11, Address(sp, 16)); + __ stpd(v12, v13, Address(sp, 32)); + __ stpd(v14, v15, Address(sp, 48)); + + // load state + __ add(rscratch1, state, 32); + __ ld1(v0, v1, v2, v3, __ T1D, state); + __ ld1(v4, v5, v6, v7, __ T1D, __ post(rscratch1, 32)); + __ ld1(v8, v9, v10, v11, __ T1D, __ post(rscratch1, 32)); + __ ld1(v12, v13, v14, v15, __ T1D, __ post(rscratch1, 32)); + __ ld1(v16, v17, v18, v19, __ T1D, __ post(rscratch1, 32)); + __ ld1(v20, v21, v22, v23, __ T1D, __ post(rscratch1, 32)); + __ ld1(v24, __ T1D, rscratch1); + + __ BIND(sha3_loop); + + // 24 keccak rounds + __ movw(rscratch2, 24); + + // load round_constants base + __ lea(rscratch1, ExternalAddress((address) round_consts)); + + // load input + __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32)); + __ ld1(v29, v30, v31, __ T8B, __ post(buf, 24)); + __ eor(v0, __ T8B, v0, v25); + __ eor(v1, __ T8B, v1, v26); + __ eor(v2, __ T8B, v2, v27); + __ eor(v3, __ T8B, v3, v28); + __ eor(v4, __ T8B, v4, v29); + __ eor(v5, __ T8B, v5, v30); + __ eor(v6, __ T8B, v6, v31); + + // digest_length == 64, SHA3-512 + __ tbnz(digest_length, 6, sha3_512); + + __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32)); + __ ld1(v29, v30, __ T8B, __ post(buf, 16)); + __ eor(v7, __ T8B, v7, v25); + __ eor(v8, __ T8B, v8, v26); + __ eor(v9, __ T8B, v9, v27); + __ eor(v10, __ T8B, v10, v28); + __ eor(v11, __ T8B, v11, v29); + __ eor(v12, __ T8B, v12, v30); + + // digest_length == 28, SHA3-224; digest_length == 48, SHA3-384 + __ tbnz(digest_length, 4, sha3_384_or_224); + + // SHA3-256 + __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32)); + __ eor(v13, __ T8B, v13, v25); + __ eor(v14, __ T8B, v14, v26); + __ eor(v15, __ T8B, v15, v27); + __ eor(v16, __ T8B, v16, v28); + __ b(rounds24_loop); + + __ BIND(sha3_384_or_224); + __ tbz(digest_length, 2, rounds24_loop); // bit 2 cleared? SHA-384 + + // SHA3-224 + __ ld1(v25, v26, v27, v28, __ T8B, __ post(buf, 32)); + __ ld1(v29, __ T8B, __ post(buf, 8)); + __ eor(v13, __ T8B, v13, v25); + __ eor(v14, __ T8B, v14, v26); + __ eor(v15, __ T8B, v15, v27); + __ eor(v16, __ T8B, v16, v28); + __ eor(v17, __ T8B, v17, v29); + __ b(rounds24_loop); + + __ BIND(sha3_512); + __ ld1(v25, v26, __ T8B, __ post(buf, 16)); + __ eor(v7, __ T8B, v7, v25); + __ eor(v8, __ T8B, v8, v26); + + __ BIND(rounds24_loop); + __ subw(rscratch2, rscratch2, 1); + + __ eor3(v29, __ T16B, v4, v9, v14); + __ eor3(v26, __ T16B, v1, v6, v11); + __ eor3(v28, __ T16B, v3, v8, v13); + __ eor3(v25, __ T16B, v0, v5, v10); + __ eor3(v27, __ T16B, v2, v7, v12); + __ eor3(v29, __ T16B, v29, v19, v24); + __ eor3(v26, __ T16B, v26, v16, v21); + __ eor3(v28, __ T16B, v28, v18, v23); + __ eor3(v25, __ T16B, v25, v15, v20); + __ eor3(v27, __ T16B, v27, v17, v22); + + __ rax1(v30, __ T2D, v29, v26); + __ rax1(v26, __ T2D, v26, v28); + __ rax1(v28, __ T2D, v28, v25); + __ rax1(v25, __ T2D, v25, v27); + __ rax1(v27, __ T2D, v27, v29); + + __ eor(v0, __ T16B, v0, v30); + __ xar(v29, __ T2D, v1, v25, (64 - 1)); + __ xar(v1, __ T2D, v6, v25, (64 - 44)); + __ xar(v6, __ T2D, v9, v28, (64 - 20)); + __ xar(v9, __ T2D, v22, v26, (64 - 61)); + __ xar(v22, __ T2D, v14, v28, (64 - 39)); + __ xar(v14, __ T2D, v20, v30, (64 - 18)); + __ xar(v31, __ T2D, v2, v26, (64 - 62)); + __ xar(v2, __ T2D, v12, v26, (64 - 43)); + __ xar(v12, __ T2D, v13, v27, (64 - 25)); + __ xar(v13, __ T2D, v19, v28, (64 - 8)); + __ xar(v19, __ T2D, v23, v27, (64 - 56)); + __ xar(v23, __ T2D, v15, v30, (64 - 41)); + __ xar(v15, __ T2D, v4, v28, (64 - 27)); + __ xar(v28, __ T2D, v24, v28, (64 - 14)); + __ xar(v24, __ T2D, v21, v25, (64 - 2)); + __ xar(v8, __ T2D, v8, v27, (64 - 55)); + __ xar(v4, __ T2D, v16, v25, (64 - 45)); + __ xar(v16, __ T2D, v5, v30, (64 - 36)); + __ xar(v5, __ T2D, v3, v27, (64 - 28)); + __ xar(v27, __ T2D, v18, v27, (64 - 21)); + __ xar(v3, __ T2D, v17, v26, (64 - 15)); + __ xar(v25, __ T2D, v11, v25, (64 - 10)); + __ xar(v26, __ T2D, v7, v26, (64 - 6)); + __ xar(v30, __ T2D, v10, v30, (64 - 3)); + + __ bcax(v20, __ T16B, v31, v22, v8); + __ bcax(v21, __ T16B, v8, v23, v22); + __ bcax(v22, __ T16B, v22, v24, v23); + __ bcax(v23, __ T16B, v23, v31, v24); + __ bcax(v24, __ T16B, v24, v8, v31); + + __ ld1r(v31, __ T2D, __ post(rscratch1, 8)); + + __ bcax(v17, __ T16B, v25, v19, v3); + __ bcax(v18, __ T16B, v3, v15, v19); + __ bcax(v19, __ T16B, v19, v16, v15); + __ bcax(v15, __ T16B, v15, v25, v16); + __ bcax(v16, __ T16B, v16, v3, v25); + + __ bcax(v10, __ T16B, v29, v12, v26); + __ bcax(v11, __ T16B, v26, v13, v12); + __ bcax(v12, __ T16B, v12, v14, v13); + __ bcax(v13, __ T16B, v13, v29, v14); + __ bcax(v14, __ T16B, v14, v26, v29); + + __ bcax(v7, __ T16B, v30, v9, v4); + __ bcax(v8, __ T16B, v4, v5, v9); + __ bcax(v9, __ T16B, v9, v6, v5); + __ bcax(v5, __ T16B, v5, v30, v6); + __ bcax(v6, __ T16B, v6, v4, v30); + + __ bcax(v3, __ T16B, v27, v0, v28); + __ bcax(v4, __ T16B, v28, v1, v0); + __ bcax(v0, __ T16B, v0, v2, v1); + __ bcax(v1, __ T16B, v1, v27, v2); + __ bcax(v2, __ T16B, v2, v28, v27); + + __ eor(v0, __ T16B, v0, v31); + + __ cbnzw(rscratch2, rounds24_loop); + + if (multi_block) { + // block_size = 200 - 2 * digest_length, ofs += block_size + __ add(ofs, ofs, 200); + __ sub(ofs, ofs, digest_length, Assembler::LSL, 1); + + __ cmp(ofs, limit); + __ br(Assembler::LE, sha3_loop); + __ mov(c_rarg0, ofs); // return ofs + } + + __ st1(v0, v1, v2, v3, __ T1D, __ post(state, 32)); + __ st1(v4, v5, v6, v7, __ T1D, __ post(state, 32)); + __ st1(v8, v9, v10, v11, __ T1D, __ post(state, 32)); + __ st1(v12, v13, v14, v15, __ T1D, __ post(state, 32)); + __ st1(v16, v17, v18, v19, __ T1D, __ post(state, 32)); + __ st1(v20, v21, v22, v23, __ T1D, __ post(state, 32)); + __ st1(v24, __ T1D, state); + + __ ldpd(v14, v15, Address(sp, 48)); + __ ldpd(v12, v13, Address(sp, 32)); + __ ldpd(v10, v11, Address(sp, 16)); + __ ldpd(v8, v9, __ post(sp, 64)); + + __ ret(lr); + + return start; + } + // Safefetch stubs. void generate_safefetch(const char* name, int size, address* entry, address* fault_pc, address* continuation_pc) { @@ -3739,6 +3968,238 @@ class StubGenerator: public StubCodeGenerator { return start; } + // Arguments: + // + // Input: + // c_rarg0 - newArr address + // c_rarg1 - oldArr address + // c_rarg2 - newIdx + // c_rarg3 - shiftCount + // c_rarg4 - numIter + // + address generate_bigIntegerRightShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker"); + address start = __ pc(); + + Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit; + + Register newArr = c_rarg0; + Register oldArr = c_rarg1; + Register newIdx = c_rarg2; + Register shiftCount = c_rarg3; + Register numIter = c_rarg4; + Register idx = numIter; + + Register newArrCur = rscratch1; + Register shiftRevCount = rscratch2; + Register oldArrCur = r13; + Register oldArrNext = r14; + + FloatRegister oldElem0 = v0; + FloatRegister oldElem1 = v1; + FloatRegister newElem = v2; + FloatRegister shiftVCount = v3; + FloatRegister shiftVRevCount = v4; + + __ cbz(idx, Exit); + + __ add(newArr, newArr, newIdx, Assembler::LSL, 2); + + // left shift count + __ movw(shiftRevCount, 32); + __ subw(shiftRevCount, shiftRevCount, shiftCount); + + // numIter too small to allow a 4-words SIMD loop, rolling back + __ cmp(numIter, (u1)4); + __ br(Assembler::LT, ShiftThree); + + __ dup(shiftVCount, __ T4S, shiftCount); + __ dup(shiftVRevCount, __ T4S, shiftRevCount); + __ negr(shiftVCount, __ T4S, shiftVCount); + + __ BIND(ShiftSIMDLoop); + + // Calculate the load addresses + __ sub(idx, idx, 4); + __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2); + __ add(newArrCur, newArr, idx, Assembler::LSL, 2); + __ add(oldArrCur, oldArrNext, 4); + + // Load 4 words and process + __ ld1(oldElem0, __ T4S, Address(oldArrCur)); + __ ld1(oldElem1, __ T4S, Address(oldArrNext)); + __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount); + __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount); + __ orr(newElem, __ T16B, oldElem0, oldElem1); + __ st1(newElem, __ T4S, Address(newArrCur)); + + __ cmp(idx, (u1)4); + __ br(Assembler::LT, ShiftTwoLoop); + __ b(ShiftSIMDLoop); + + __ BIND(ShiftTwoLoop); + __ cbz(idx, Exit); + __ cmp(idx, (u1)1); + __ br(Assembler::EQ, ShiftOne); + + // Calculate the load addresses + __ sub(idx, idx, 2); + __ add(oldArrNext, oldArr, idx, Assembler::LSL, 2); + __ add(newArrCur, newArr, idx, Assembler::LSL, 2); + __ add(oldArrCur, oldArrNext, 4); + + // Load 2 words and process + __ ld1(oldElem0, __ T2S, Address(oldArrCur)); + __ ld1(oldElem1, __ T2S, Address(oldArrNext)); + __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount); + __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount); + __ orr(newElem, __ T8B, oldElem0, oldElem1); + __ st1(newElem, __ T2S, Address(newArrCur)); + __ b(ShiftTwoLoop); + + __ BIND(ShiftThree); + __ tbz(idx, 1, ShiftOne); + __ tbz(idx, 0, ShiftTwo); + __ ldrw(r10, Address(oldArr, 12)); + __ ldrw(r11, Address(oldArr, 8)); + __ lsrvw(r10, r10, shiftCount); + __ lslvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, Address(newArr, 8)); + + __ BIND(ShiftTwo); + __ ldrw(r10, Address(oldArr, 8)); + __ ldrw(r11, Address(oldArr, 4)); + __ lsrvw(r10, r10, shiftCount); + __ lslvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, Address(newArr, 4)); + + __ BIND(ShiftOne); + __ ldrw(r10, Address(oldArr, 4)); + __ ldrw(r11, Address(oldArr)); + __ lsrvw(r10, r10, shiftCount); + __ lslvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, Address(newArr)); + + __ BIND(Exit); + __ ret(lr); + + return start; + } + + // Arguments: + // + // Input: + // c_rarg0 - newArr address + // c_rarg1 - oldArr address + // c_rarg2 - newIdx + // c_rarg3 - shiftCount + // c_rarg4 - numIter + // + address generate_bigIntegerLeftShift() { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker"); + address start = __ pc(); + + Label ShiftSIMDLoop, ShiftTwoLoop, ShiftThree, ShiftTwo, ShiftOne, Exit; + + Register newArr = c_rarg0; + Register oldArr = c_rarg1; + Register newIdx = c_rarg2; + Register shiftCount = c_rarg3; + Register numIter = c_rarg4; + + Register shiftRevCount = rscratch1; + Register oldArrNext = rscratch2; + + FloatRegister oldElem0 = v0; + FloatRegister oldElem1 = v1; + FloatRegister newElem = v2; + FloatRegister shiftVCount = v3; + FloatRegister shiftVRevCount = v4; + + __ cbz(numIter, Exit); + + __ add(oldArrNext, oldArr, 4); + __ add(newArr, newArr, newIdx, Assembler::LSL, 2); + + // right shift count + __ movw(shiftRevCount, 32); + __ subw(shiftRevCount, shiftRevCount, shiftCount); + + // numIter too small to allow a 4-words SIMD loop, rolling back + __ cmp(numIter, (u1)4); + __ br(Assembler::LT, ShiftThree); + + __ dup(shiftVCount, __ T4S, shiftCount); + __ dup(shiftVRevCount, __ T4S, shiftRevCount); + __ negr(shiftVRevCount, __ T4S, shiftVRevCount); + + __ BIND(ShiftSIMDLoop); + + // load 4 words and process + __ ld1(oldElem0, __ T4S, __ post(oldArr, 16)); + __ ld1(oldElem1, __ T4S, __ post(oldArrNext, 16)); + __ ushl(oldElem0, __ T4S, oldElem0, shiftVCount); + __ ushl(oldElem1, __ T4S, oldElem1, shiftVRevCount); + __ orr(newElem, __ T16B, oldElem0, oldElem1); + __ st1(newElem, __ T4S, __ post(newArr, 16)); + __ sub(numIter, numIter, 4); + + __ cmp(numIter, (u1)4); + __ br(Assembler::LT, ShiftTwoLoop); + __ b(ShiftSIMDLoop); + + __ BIND(ShiftTwoLoop); + __ cbz(numIter, Exit); + __ cmp(numIter, (u1)1); + __ br(Assembler::EQ, ShiftOne); + + // load 2 words and process + __ ld1(oldElem0, __ T2S, __ post(oldArr, 8)); + __ ld1(oldElem1, __ T2S, __ post(oldArrNext, 8)); + __ ushl(oldElem0, __ T2S, oldElem0, shiftVCount); + __ ushl(oldElem1, __ T2S, oldElem1, shiftVRevCount); + __ orr(newElem, __ T8B, oldElem0, oldElem1); + __ st1(newElem, __ T2S, __ post(newArr, 8)); + __ sub(numIter, numIter, 2); + __ b(ShiftTwoLoop); + + __ BIND(ShiftThree); + __ ldrw(r10, __ post(oldArr, 4)); + __ ldrw(r11, __ post(oldArrNext, 4)); + __ lslvw(r10, r10, shiftCount); + __ lsrvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, __ post(newArr, 4)); + __ tbz(numIter, 1, Exit); + __ tbz(numIter, 0, ShiftOne); + + __ BIND(ShiftTwo); + __ ldrw(r10, __ post(oldArr, 4)); + __ ldrw(r11, __ post(oldArrNext, 4)); + __ lslvw(r10, r10, shiftCount); + __ lsrvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, __ post(newArr, 4)); + + __ BIND(ShiftOne); + __ ldrw(r10, Address(oldArr)); + __ ldrw(r11, Address(oldArrNext)); + __ lslvw(r10, r10, shiftCount); + __ lsrvw(r11, r11, shiftRevCount); + __ orrw(r12, r10, r11); + __ strw(r12, Address(newArr)); + + __ BIND(Exit); + __ ret(lr); + + return start; + } + void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) { @@ -4942,6 +5403,150 @@ class StubGenerator: public StubCodeGenerator { return start; } + void generate_base64_encode_simdround(Register src, Register dst, + FloatRegister codec, u8 size) { + + FloatRegister in0 = v4, in1 = v5, in2 = v6; + FloatRegister out0 = v16, out1 = v17, out2 = v18, out3 = v19; + FloatRegister ind0 = v20, ind1 = v21, ind2 = v22, ind3 = v23; + + Assembler::SIMD_Arrangement arrangement = size == 16 ? __ T16B : __ T8B; + + __ ld3(in0, in1, in2, arrangement, __ post(src, 3 * size)); + + __ ushr(ind0, arrangement, in0, 2); + + __ ushr(ind1, arrangement, in1, 2); + __ shl(in0, arrangement, in0, 6); + __ orr(ind1, arrangement, ind1, in0); + __ ushr(ind1, arrangement, ind1, 2); + + __ ushr(ind2, arrangement, in2, 4); + __ shl(in1, arrangement, in1, 4); + __ orr(ind2, arrangement, in1, ind2); + __ ushr(ind2, arrangement, ind2, 2); + + __ shl(ind3, arrangement, in2, 2); + __ ushr(ind3, arrangement, ind3, 2); + + __ tbl(out0, arrangement, codec, 4, ind0); + __ tbl(out1, arrangement, codec, 4, ind1); + __ tbl(out2, arrangement, codec, 4, ind2); + __ tbl(out3, arrangement, codec, 4, ind3); + + __ st4(out0, out1, out2, out3, arrangement, __ post(dst, 4 * size)); + } + + /** + * Arguments: + * + * Input: + * c_rarg0 - src_start + * c_rarg1 - src_offset + * c_rarg2 - src_length + * c_rarg3 - dest_start + * c_rarg4 - dest_offset + * c_rarg5 - isURL + * + */ + address generate_base64_encodeBlock() { + + static const char toBase64[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' + }; + + static const char toBase64URL[64] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' + }; + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "encodeBlock"); + address start = __ pc(); + + Register src = c_rarg0; // source array + Register soff = c_rarg1; // source start offset + Register send = c_rarg2; // source end offset + Register dst = c_rarg3; // dest array + Register doff = c_rarg4; // position for writing to dest array + Register isURL = c_rarg5; // Base64 or URL chracter set + + // c_rarg6 and c_rarg7 are free to use as temps + Register codec = c_rarg6; + Register length = c_rarg7; + + Label ProcessData, Process48B, Process24B, Process3B, SIMDExit, Exit; + + __ add(src, src, soff); + __ add(dst, dst, doff); + __ sub(length, send, soff); + + // load the codec base address + __ lea(codec, ExternalAddress((address) toBase64)); + __ cbz(isURL, ProcessData); + __ lea(codec, ExternalAddress((address) toBase64URL)); + + __ BIND(ProcessData); + + // too short to formup a SIMD loop, roll back + __ cmp(length, (u1)24); + __ br(Assembler::LT, Process3B); + + __ ld1(v0, v1, v2, v3, __ T16B, Address(codec)); + + __ BIND(Process48B); + __ cmp(length, (u1)48); + __ br(Assembler::LT, Process24B); + generate_base64_encode_simdround(src, dst, v0, 16); + __ sub(length, length, 48); + __ b(Process48B); + + __ BIND(Process24B); + __ cmp(length, (u1)24); + __ br(Assembler::LT, SIMDExit); + generate_base64_encode_simdround(src, dst, v0, 8); + __ sub(length, length, 24); + + __ BIND(SIMDExit); + __ cbz(length, Exit); + + __ BIND(Process3B); + // 3 src bytes, 24 bits + __ ldrb(r10, __ post(src, 1)); + __ ldrb(r11, __ post(src, 1)); + __ ldrb(r12, __ post(src, 1)); + __ orrw(r11, r11, r10, Assembler::LSL, 8); + __ orrw(r12, r12, r11, Assembler::LSL, 8); + // codec index + __ ubfmw(r15, r12, 18, 23); + __ ubfmw(r14, r12, 12, 17); + __ ubfmw(r13, r12, 6, 11); + __ andw(r12, r12, 63); + // get the code based on the codec + __ ldrb(r15, Address(codec, r15, Address::uxtw(0))); + __ ldrb(r14, Address(codec, r14, Address::uxtw(0))); + __ ldrb(r13, Address(codec, r13, Address::uxtw(0))); + __ ldrb(r12, Address(codec, r12, Address::uxtw(0))); + __ strb(r15, __ post(dst, 1)); + __ strb(r14, __ post(dst, 1)); + __ strb(r13, __ post(dst, 1)); + __ strb(r12, __ post(dst, 1)); + __ sub(length, length, 3); + __ cbnz(length, Process3B); + + __ BIND(Exit); + __ ret(lr); + + return start; + } + // Continuation point for throwing of implicit exceptions that are // not handled in the current activation. Fabricates an exception // oop and initiates normal exception dispatching in this @@ -5958,6 +6563,8 @@ class StubGenerator: public StubCodeGenerator { SharedRuntime:: throw_NullPointerException_at_call)); + StubRoutines::aarch64::_vector_iota_indices = generate_iota_indices("iota_indices"); + // arraycopy stubs used by compilers generate_arraycopy_stubs(); @@ -5993,6 +6600,11 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_mulAdd = generate_mulAdd(); } + if (UseSIMDForBigIntegerShiftIntrinsics) { + StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift(); + StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift(); + } + if (UseMontgomeryMultiplyIntrinsic) { StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); MontgomeryMultiplyGenerator g(_masm, /*squaring*/false); @@ -6013,6 +6625,10 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); } + if (UseBASE64Intrinsics) { + StubRoutines::_base64_encodeBlock = generate_base64_encodeBlock(); + } + // data cache line writeback StubRoutines::_data_cache_writeback = generate_data_cache_writeback(); StubRoutines::_data_cache_writeback_sync = generate_data_cache_writeback_sync(); @@ -6036,6 +6652,10 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); } + if (UseSHA3Intrinsics) { + StubRoutines::_sha3_implCompress = generate_sha3_implCompress(false, "sha3_implCompress"); + StubRoutines::_sha3_implCompressMB = generate_sha3_implCompress(true, "sha3_implCompressMB"); + } // generate Adler32 intrinsics code if (UseAdler32Intrinsics) { diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp index b2d0d5dbff8..f471209a4c0 100644 --- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.cpp @@ -40,6 +40,7 @@ address StubRoutines::aarch64::_f2i_fixup = NULL; address StubRoutines::aarch64::_f2l_fixup = NULL; address StubRoutines::aarch64::_d2i_fixup = NULL; address StubRoutines::aarch64::_d2l_fixup = NULL; +address StubRoutines::aarch64::_vector_iota_indices = NULL; address StubRoutines::aarch64::_float_sign_mask = NULL; address StubRoutines::aarch64::_float_sign_flip = NULL; address StubRoutines::aarch64::_double_sign_mask = NULL; diff --git a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp index 4ace7b5c808..6960a19b3f5 100644 --- a/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/stubRoutines_aarch64.hpp @@ -51,6 +51,7 @@ class aarch64 { static address _d2i_fixup; static address _d2l_fixup; + static address _vector_iota_indices; static address _float_sign_mask; static address _float_sign_flip; static address _double_sign_mask; @@ -106,6 +107,10 @@ class aarch64 { return _d2l_fixup; } + static address vector_iota_indices() { + return _vector_iota_indices; + } + static address float_sign_mask() { return _float_sign_mask; diff --git a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp index 21566592a9f..874d8ce2766 100644 --- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp @@ -980,7 +980,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() { Label slow_path; // If we need a safepoint check, generate full interpreter entry. - __ safepoint_poll(slow_path); + __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. @@ -1029,7 +1029,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI Label slow_path; // If we need a safepoint check, generate full interpreter entry. - __ safepoint_poll(slow_path); + __ safepoint_poll(slow_path, false /* at_return */, false /* acquire */, false /* in_nmethod */); // We don't generate local frame and don't align stack because // we call stub code and there is no safepoint on this path. @@ -1120,7 +1120,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { // an interpreter frame with greater than a page of locals, so each page // needs to be checked. Only true for non-native. if (UseStackBanging) { - const int n_shadow_pages = StackOverflow::stack_shadow_zone_size() / os::vm_page_size(); + const int n_shadow_pages = (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()); const int start_page = native_call ? n_shadow_pages : 1; const int page_size = os::vm_page_size(); for (int pages = start_page; pages <= n_shadow_pages ; pages++) { @@ -1388,7 +1388,16 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // check for safepoint operation in progress and/or pending suspend requests { Label L, Continue; - __ safepoint_poll_acquire(L); + + // We need an acquire here to ensure that any subsequent load of the + // global SafepointSynchronize::_state flag is ordered after this load + // of the thread-local polling word. We don't want this poll to + // return false (i.e. not safepointing) and a later poll of the global + // SafepointSynchronize::_state spuriously to return true. + // + // This is to avoid a race when we're in a native->Java transition + // racing the code which wakes up from a safepoint. + __ safepoint_poll(L, true /* at_return */, true /* acquire */, false /* in_nmethod */); __ ldrw(rscratch2, Address(rthread, JavaThread::suspend_flags_offset())); __ cbz(rscratch2, Continue); __ bind(L); diff --git a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp index 842f07ae9a0..811783fcb7d 100644 --- a/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/templateTable_aarch64.cpp @@ -1906,7 +1906,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) __ dispatch_only(vtos, /*generate_poll*/true); if (UseLoopCounter) { - if (ProfileInterpreter) { + if (ProfileInterpreter && !TieredCompilation) { // Out-of-line code to allocate method data oop. __ bind(profile_method); __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp index 343a2bbd50f..2a6553d9c21 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.cpp @@ -181,10 +181,6 @@ void VM_Version::initialize() { } if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH; - // If an olde style /proc/cpuinfo (cores == 1) then if _model is an A57 (0xd07) - // we assume the worst and assume we could be on a big little system and have - // undisclosed A53 cores which we could be swapped to at any stage - if (_cpu == CPU_ARM && os::processor_count() == 1 && _model == 0xd07) _features |= CPU_A53MAC; char buf[512]; sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision); @@ -194,6 +190,7 @@ void VM_Version::initialize() { if (_features & CPU_AES) strcat(buf, ", aes"); if (_features & CPU_SHA1) strcat(buf, ", sha1"); if (_features & CPU_SHA2) strcat(buf, ", sha256"); + if (_features & CPU_SHA3) strcat(buf, ", sha3"); if (_features & CPU_SHA512) strcat(buf, ", sha512"); if (_features & CPU_LSE) strcat(buf, ", lse"); if (_features & CPU_SVE) strcat(buf, ", sve"); @@ -275,7 +272,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseMD5Intrinsics, false); } - if (_features & (CPU_SHA1 | CPU_SHA2)) { + if (_features & (CPU_SHA1 | CPU_SHA2 | CPU_SHA3 | CPU_SHA512)) { if (FLAG_IS_DEFAULT(UseSHA)) { FLAG_SET_DEFAULT(UseSHA, true); } @@ -302,6 +299,16 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); } + if (UseSHA && (_features & CPU_SHA3)) { + // Do not auto-enable UseSHA3Intrinsics until it has been fully tested on hardware + // if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) { + // FLAG_SET_DEFAULT(UseSHA3Intrinsics, true); + // } + } else if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + if (UseSHA && (_features & CPU_SHA512)) { // Do not auto-enable UseSHA512Intrinsics until it has been fully tested on hardware // if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { @@ -312,7 +319,7 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } - if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA3Intrinsics || UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA, false); } @@ -325,6 +332,10 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); } + if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { + UseBASE64Intrinsics = true; + } + if (is_zva_enabled()) { if (FLAG_IS_DEFAULT(UseBlockZeroing)) { FLAG_SET_DEFAULT(UseBlockZeroing, true); @@ -390,7 +401,7 @@ void VM_Version::initialize() { warning("SVE does not support vector length less than 16 bytes. Disabling SVE."); UseSVE = 0; } else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) { - int new_vl = set_and_get_current_sve_vector_lenght(MaxVectorSize); + int new_vl = set_and_get_current_sve_vector_length(MaxVectorSize); _initial_sve_vector_length = new_vl; // Update MaxVectorSize to the largest supported value. if (new_vl < 0) { diff --git a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp index 292550529b4..45838f87072 100644 --- a/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/vm_version_aarch64.hpp @@ -51,7 +51,7 @@ class VM_Version : public Abstract_VM_Version { // Sets the SVE length and returns a new actual value or negative on error. // If the len is larger than the system largest supported SVE vector length, // the function sets the largest supported value. - static int set_and_get_current_sve_vector_lenght(int len); + static int set_and_get_current_sve_vector_length(int len); static int get_current_sve_vector_length(); public: @@ -103,6 +103,7 @@ class VM_Version : public Abstract_VM_Version { CPU_CRC32 = (1<<7), CPU_LSE = (1<<8), CPU_DCPOP = (1<<16), + CPU_SHA3 = (1<<17), CPU_SHA512 = (1<<21), CPU_SVE = (1<<22), // flags above must follow Linux HWCAP @@ -128,6 +129,7 @@ class VM_Version : public Abstract_VM_Version { static int get_initial_sve_vector_length() { return _initial_sve_vector_length; }; static bool supports_fast_class_init_checks() { return true; } + constexpr static bool supports_stack_watermark_barrier() { return true; } }; #endif // CPU_AARCH64_VM_VERSION_AARCH64_HPP diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 4c237673181..b7c6ec48896 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -993,6 +993,10 @@ const bool Matcher::has_predicated_vectors(void) { return false; } +bool Matcher::supports_vector_variable_shifts(void) { + return VM_Version::has_simd(); +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -1155,10 +1159,6 @@ const bool Matcher::rematerialize_float_constants = false; // Java calling convention forces doubles to be aligned. const bool Matcher::misaligned_doubles_ok = false; -// No-op on ARM. -void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { -} - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. const bool Matcher::strict_fp_requires_explicit_rounding = false; @@ -1663,7 +1663,6 @@ frame %{ // These two registers define part of the calling convention // between compiled code and the interpreter. inline_cache_reg(R_Ricklass); // Inline Cache Register or Method* for I2C - interpreter_method_reg(R_Rmethod); // Method Register when calling interpreter // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] cisc_spilling_operand_name(indOffset); @@ -2523,14 +2522,6 @@ operand inline_cache_regP(iRegP reg) %{ interface(REG_INTER); %} -operand interpreter_method_regP(iRegP reg) %{ - constraint(ALLOC_IN_RC(Rmethod_regP)); - match(reg); - format %{ %} - interface(REG_INTER); -%} - - //----------Complex Operands--------------------------------------------------- // Indirect Memory Reference operand indirect(sp_ptr_RegP reg) %{ diff --git a/src/hotspot/cpu/arm/arm_32.ad b/src/hotspot/cpu/arm/arm_32.ad index 177c1a7cae0..09fce8c4c4f 100644 --- a/src/hotspot/cpu/arm/arm_32.ad +++ b/src/hotspot/cpu/arm/arm_32.ad @@ -182,11 +182,11 @@ alloc_class chunk0( alloc_class chunk1( R_S16, R_S17, R_S18, R_S19, R_S20, R_S21, R_S22, R_S23, R_S24, R_S25, R_S26, R_S27, R_S28, R_S29, R_S30, R_S31, - R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, + R_S0, R_S1, R_S2, R_S3, R_S4, R_S5, R_S6, R_S7, R_S8, R_S9, R_S10, R_S11, R_S12, R_S13, R_S14, R_S15, - R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x, - R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x, - R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x, + R_D16, R_D16x,R_D17, R_D17x,R_D18, R_D18x,R_D19, R_D19x, + R_D20, R_D20x,R_D21, R_D21x,R_D22, R_D22x,R_D23, R_D23x, + R_D24, R_D24x,R_D25, R_D25x,R_D26, R_D26x,R_D27, R_D27x, R_D28, R_D28x,R_D29, R_D29x,R_D30, R_D30x,R_D31, R_D31x ); @@ -196,8 +196,7 @@ alloc_class chunk2(APSR, FPSCR); // Several register classes are automatically defined based upon information in // this architecture description. // 1) reg_class inline_cache_reg ( as defined in frame section ) -// 2) reg_class interpreter_method_reg ( as defined in frame section ) -// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // // ---------------------------- @@ -223,7 +222,6 @@ reg_class ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_ reg_class sp_ptr_reg(R_R0, R_R1, R_R2, R_R3, R_R4, R_R5, R_R6, R_R7, R_R8, R_R9, R_R11, R_R12, R_R14, R_R10 /* TLS*/, R_R13 /* SP*/); #define R_Ricklass R_R8 -#define R_Rmethod R_R9 #define R_Rthread R_R10 #define R_Rexception_obj R_R4 @@ -237,7 +235,6 @@ reg_class R9_regP(R_R9); reg_class R12_regP(R_R12); reg_class Rexception_regP(R_Rexception_obj); reg_class Ricklass_regP(R_Ricklass); -reg_class Rmethod_regP(R_Rmethod); reg_class Rthread_regP(R_Rthread); reg_class IP_regP(R_R12); reg_class SP_regP(R_R13); @@ -442,7 +439,7 @@ int MachCallStaticJavaNode::ret_addr_offset() { int MachCallDynamicJavaNode::ret_addr_offset() { bool far = !cache_reachable(); // mov_oop is always 2 words - return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size; + return (2 + (far ? 3 : 1)) * NativeInstruction::instruction_size; } int MachCallRuntimeNode::ret_addr_offset() { diff --git a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp index 915eb73730c..6b390c1cda3 100644 --- a/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp +++ b/src/hotspot/cpu/arm/c1_CodeStubs_arm.cpp @@ -38,6 +38,10 @@ #define __ ce->masm()-> +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + ShouldNotReachHere(); +} + void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); ce->store_parameter(_bci, 0); diff --git a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp index 7b0794afc9f..f9b5fc69a89 100644 --- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp @@ -283,7 +283,7 @@ int LIR_Assembler::emit_deopt_handler() { } -void LIR_Assembler::return_op(LIR_Opr result) { +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { // Pop the frame before safepoint polling __ remove_frame(initial_frame_size_in_bytes()); __ read_polling_page(Rtemp, relocInfo::poll_return_type); diff --git a/src/hotspot/cpu/arm/c1_globals_arm.hpp b/src/hotspot/cpu/arm/c1_globals_arm.hpp index 8141870536b..7077a87092c 100644 --- a/src/hotspot/cpu/arm/c1_globals_arm.hpp +++ b/src/hotspot/cpu/arm/c1_globals_arm.hpp @@ -35,8 +35,6 @@ #ifndef COMPILER2 // avoid duplicated definitions, favoring C2 version define_pd_global(bool, BackgroundCompilation, true ); -define_pd_global(bool, UseTLAB, true ); -define_pd_global(bool, ResizeTLAB, true ); define_pd_global(bool, InlineIntrinsics, false); // TODO: ARM define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, false); diff --git a/src/hotspot/cpu/arm/c2_globals_arm.hpp b/src/hotspot/cpu/arm/c2_globals_arm.hpp index 3708e38da2e..525af8b1edc 100644 --- a/src/hotspot/cpu/arm/c2_globals_arm.hpp +++ b/src/hotspot/cpu/arm/c2_globals_arm.hpp @@ -54,8 +54,6 @@ define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); // (For _228_jack 16/16 is 2% better than 4/4, 16/4, 32/32, 32/16, or 16/32.) //define_pd_global(intx, OptoLoopAlignment, 16); // = 4*wordSize define_pd_global(intx, RegisterCostAreaRatio, 16000); -define_pd_global(bool, UseTLAB, true); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(intx, LoopUnrollLimit, 60); // Design center runs on 1.3.1 define_pd_global(intx, LoopPercentProfileLimit, 10); define_pd_global(intx, MinJumpTableSize, 16); diff --git a/src/hotspot/cpu/arm/interp_masm_arm.cpp b/src/hotspot/cpu/arm/interp_masm_arm.cpp index 116d2d40b2e..01ff3a5d39c 100644 --- a/src/hotspot/cpu/arm/interp_masm_arm.cpp +++ b/src/hotspot/cpu/arm/interp_masm_arm.cpp @@ -580,7 +580,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, if (needs_thread_local_poll) { NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); - ldr(Rtemp, Address(Rthread, Thread::polling_page_offset())); + ldr(Rtemp, Address(Rthread, Thread::polling_word_offset())); tbnz(Rtemp, exact_log2(SafepointMechanism::poll_bit()), safepoint); } @@ -983,7 +983,7 @@ void InterpreterMacroAssembler::lock_object(Register Rlock) { // Unlocks an object. Used in monitorexit bytecode and remove_activation. // -// Argument: R1: Points to BasicObjectLock structure for lock +// Argument: R0: Points to BasicObjectLock structure for lock // Throw an IllegalMonitorException if object is not locked by current thread // Blows volatile registers R0-R3, Rtemp, LR. Calls VM. void InterpreterMacroAssembler::unlock_object(Register Rlock) { @@ -996,8 +996,7 @@ void InterpreterMacroAssembler::unlock_object(Register Rlock) { const Register Robj = R2; const Register Rmark = R3; - const Register Rresult = R0; - assert_different_registers(Robj, Rmark, Rlock, R0, Rtemp); + assert_different_registers(Robj, Rmark, Rlock, Rtemp); const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.cpp b/src/hotspot/cpu/arm/macroAssembler_arm.cpp index 14ac1163da0..067ec704376 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp @@ -85,20 +85,6 @@ void AddressLiteral::set_rspec(relocInfo::relocType rtype) { } } -// Initially added to the Assembler interface as a pure virtual: -// RegisterConstant delayed_value(..) -// for: -// 6812678 macro assembler needs delayed binding of a few constants (for 6655638) -// this was subsequently modified to its present name and return type -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - ShouldNotReachHere(); - return RegisterOrConstant(-1); -} - - - // virtual method calling void MacroAssembler::lookup_virtual_method(Register recv_klass, @@ -1914,7 +1900,7 @@ void MacroAssembler::resolve(DecoratorSet decorators, Register obj) { } void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) { - ldr_u32(tmp1, Address(Rthread, Thread::polling_page_offset())); + ldr_u32(tmp1, Address(Rthread, Thread::polling_word_offset())); tst(tmp1, exact_log2(SafepointMechanism::poll_bit())); b(slow_path, eq); } diff --git a/src/hotspot/cpu/arm/macroAssembler_arm.hpp b/src/hotspot/cpu/arm/macroAssembler_arm.hpp index de40c5741a7..a07ca65d99e 100644 --- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp +++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp @@ -222,14 +222,6 @@ class MacroAssembler: public Assembler { // returning false to preserve all relocation information. inline bool ignore_non_patchable_relocations() { return true; } - // Initially added to the Assembler interface as a pure virtual: - // RegisterConstant delayed_value(..) - // for: - // 6812678 macro assembler needs delayed binding of a few constants (for 6655638) - // this was subsequently modified to its present name and return type - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); - - void align(int modulus); // Support for VM calls diff --git a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp index 7dd1f21a244..a4216785e4e 100644 --- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp +++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp @@ -33,6 +33,7 @@ #include "memory/resourceArea.hpp" #include "oops/compiledICHolder.hpp" #include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/vframeArray.hpp" diff --git a/src/hotspot/cpu/arm/templateTable_arm.cpp b/src/hotspot/cpu/arm/templateTable_arm.cpp index a27bd25557c..d0bcfccbb8d 100644 --- a/src/hotspot/cpu/arm/templateTable_arm.cpp +++ b/src/hotspot/cpu/arm/templateTable_arm.cpp @@ -2101,7 +2101,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { const Address mask(Rcounters, in_bytes(MethodCounters::backedge_mask_offset())); __ increment_mask_and_jump(Address(Rcounters, be_offset), increment, mask, Rcnt, R4_tmp, eq, &backedge_counter_overflow); - } else { + } else { // not TieredCompilation // Increment backedge counter in MethodCounters* __ get_method_counters(Rmethod, Rcounters, dispatch, true /*saveRegs*/, Rdisp, R3_bytecode, @@ -2166,7 +2166,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { __ dispatch_only(vtos, true); if (UseLoopCounter) { - if (ProfileInterpreter) { + if (ProfileInterpreter && !TieredCompilation) { // Out-of-line code to allocate method data oop. __ bind(profile_method); diff --git a/src/hotspot/cpu/arm/vm_version_arm_32.cpp b/src/hotspot/cpu/arm/vm_version_arm_32.cpp index 5331a20f2fe..e6fd8b98668 100644 --- a/src/hotspot/cpu/arm/vm_version_arm_32.cpp +++ b/src/hotspot/cpu/arm/vm_version_arm_32.cpp @@ -236,6 +236,11 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + if (UseCRC32Intrinsics) { if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) warning("CRC32 intrinsics are not available on this CPU"); diff --git a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp index b13e18efc12..6902c47d71b 100644 --- a/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_CodeStubs_ppc.cpp @@ -38,6 +38,9 @@ #define __ ce->masm()-> +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + ShouldNotReachHere(); +} RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp index 54e79f9d4bd..72adb74f4cc 100644 --- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp @@ -1324,7 +1324,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type, } -void LIR_Assembler::return_op(LIR_Opr result) { +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { const Register return_pc = R31; // Must survive C-call to enable_stack_reserved_zone(). const Register polling_page = R12; diff --git a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp index 60b0005e034..f90c1e8b1d2 100644 --- a/src/hotspot/cpu/ppc/c1_globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/c1_globals_ppc.hpp @@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false); define_pd_global(intx, CompileThreshold, 1000); define_pd_global(intx, OnStackReplacePercentage, 1400); -define_pd_global(bool, UseTLAB, true); define_pd_global(bool, ProfileInterpreter, false); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(uintx, ReservedCodeCacheSize, 32*M); define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M ); define_pd_global(uintx, ProfiledCodeHeapSize, 14*M ); diff --git a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp index 7a0c311e719..c576ddc95c4 100644 --- a/src/hotspot/cpu/ppc/c2_globals_ppc.hpp +++ b/src/hotspot/cpu/ppc/c2_globals_ppc.hpp @@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 26); define_pd_global(intx, InteriorEntryAlignment, 16); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 16000); -define_pd_global(bool, UseTLAB, true); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopPercentProfileLimit, 10); diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp index d58740d5a74..67b18dc0e31 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc.hpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc.hpp @@ -211,7 +211,7 @@ class InterpreterMacroAssembler: public MacroAssembler { // Object locking void lock_object (Register lock_reg, Register obj_reg); - void unlock_object(Register lock_reg, bool check_for_exceptions = true); + void unlock_object(Register lock_reg); // Interpreter profiling operations void set_method_data_pointer_for_bcp(); diff --git a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp index 555cfd41418..292accb7852 100644 --- a/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/interp_masm_ppc_64.cpp @@ -223,7 +223,7 @@ void InterpreterMacroAssembler::dispatch_Lbyte_code(TosState state, Register byt address *sfpt_tbl = Interpreter::safept_table(state); if (table != sfpt_tbl) { Label dispatch; - ld(R0, in_bytes(Thread::polling_page_offset()), R16_thread); + ld(R0, in_bytes(Thread::polling_word_offset()), R16_thread); // Armed page has poll_bit set, if poll bit is cleared just continue. andi_(R0, R0, SafepointMechanism::poll_bit()); beq(CCR0, dispatch); @@ -878,8 +878,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state, // void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { if (UseHeavyMonitors) { - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - monitor, /*check_for_exceptions=*/true); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); } else { // template code: // @@ -980,8 +979,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // None of the above fast optimizations worked so we have to get into the // slow case of monitor enter. bind(slow_case); - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - monitor, /*check_for_exceptions=*/true); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); // } align(32, 12); bind(done); @@ -995,7 +993,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // which must be initialized with the object to lock. // // Throw IllegalMonitorException if object is not locked by current thread. -void InterpreterMacroAssembler::unlock_object(Register monitor, bool check_for_exceptions) { +void InterpreterMacroAssembler::unlock_object(Register monitor) { if (UseHeavyMonitors) { call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), monitor); } else { @@ -2401,8 +2399,7 @@ void InterpreterMacroAssembler::notify_method_entry() { lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread); cmpwi(CCR0, R0, 0); beq(CCR0, jvmti_post_done); - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry), - /*check_exceptions=*/true); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry)); bind(jvmti_post_done); } @@ -2437,8 +2434,7 @@ void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosSta cmpwi(CCR0, R0, 0); beq(CCR0, jvmti_post_done); if (!is_native_method) { push(state); } // Expose tos to GC. - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), - /*check_exceptions=*/check_exceptions); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit), check_exceptions); if (!is_native_method) { pop(state); } align(32, 12); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp index 3d3c39cf5d5..ca1c0c24987 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.cpp @@ -382,25 +382,6 @@ AddressLiteral MacroAssembler::constant_oop_address(jobject obj) { return AddressLiteral(address(obj), oop_Relocation::spec(oop_index)); } -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) { - return RegisterOrConstant(value + offset); - } - - // Load indirectly to solve generation ordering problem. - // static address, no relocation - int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true); - ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0) - - if (offset != 0) { - addi(tmp, tmp, offset); - } - - return RegisterOrConstant(tmp); -} - #ifndef PRODUCT void MacroAssembler::pd_print_patched_instruction(address branch) { Unimplemented(); // TODO: PPC port @@ -3044,7 +3025,7 @@ void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Registe } void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { - ld(temp_reg, in_bytes(Thread::polling_page_offset()), R16_thread); + ld(temp_reg, in_bytes(Thread::polling_word_offset()), R16_thread); // Armed page has poll_bit set. andi_(temp_reg, temp_reg, SafepointMechanism::poll_bit()); bne(CCR0, slow_path); diff --git a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp index a8e43cabdc4..1859483c470 100644 --- a/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp +++ b/src/hotspot/cpu/ppc/macroAssembler_ppc.hpp @@ -152,12 +152,6 @@ class MacroAssembler: public Assembler { // Same as load_address. inline void set_oop (AddressLiteral obj_addr, Register d); - // Read runtime constant: Issue load if constant not yet established, - // else use real constant. - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset); - // // branch, jump // diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp index fbe956322a6..1134ed0366b 100644 --- a/src/hotspot/cpu/ppc/nativeInst_ppc.cpp +++ b/src/hotspot/cpu/ppc/nativeInst_ppc.cpp @@ -197,7 +197,11 @@ intptr_t NativeMovConstReg::data() const { CodeBlob* cb = CodeCache::find_blob_unsafe(addr); if (MacroAssembler::is_set_narrow_oop(addr, cb->content_begin())) { narrowOop no = MacroAssembler::get_narrow_oop(addr, cb->content_begin()); - return cast_from_oop(CompressedOops::decode(no)); + // We can reach here during GC with 'no' pointing to new object location + // while 'heap()->is_in' still reports false (e.g. with SerialGC). + // Therefore we use raw decoding. + if (CompressedOops::is_null(no)) return 0; + return cast_from_oop(CompressedOops::decode_raw(no)); } else { assert(MacroAssembler::is_load_const_from_method_toc_at(addr), "must be load_const_from_pool"); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index d9c7c350e8e..b8f4f26995f 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -535,9 +535,7 @@ alloc_class chunk4 ( // information in this architecture description. // 1) reg_class inline_cache_reg ( as defined in frame section ) -// 2) reg_class compiler_method_reg ( as defined in frame section ) -// 2) reg_class interpreter_method_reg ( as defined in frame section ) -// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) +// 2) reg_class stack_slots( /* one chunk of stack-based "registers" */ ) // // ---------------------------- @@ -2064,103 +2062,88 @@ static int frame_slots_bias(int reg_enc, PhaseRegAlloc* ra_) { } const bool Matcher::match_rule_supported(int opcode) { - if (!has_match_rule(opcode)) - return false; + if (!has_match_rule(opcode)) { + return false; // no match rule present + } - bool ret_value = true; switch (opcode) { - case Op_SqrtD: - return VM_Version::has_fsqrt(); - case Op_CountLeadingZerosI: - case Op_CountLeadingZerosL: - if (!UseCountLeadingZerosInstructionsPPC64) - return false; - break; - case Op_CountTrailingZerosI: - case Op_CountTrailingZerosL: - if (!UseCountLeadingZerosInstructionsPPC64 && - !UseCountTrailingZerosInstructionsPPC64) - return false; - break; - - case Op_PopCountI: - case Op_PopCountL: - return (UsePopCountInstruction && VM_Version::has_popcntw()); - - case Op_StrComp: - return SpecialStringCompareTo; - case Op_StrEquals: - return SpecialStringEquals; - case Op_StrIndexOf: - case Op_StrIndexOfChar: - return SpecialStringIndexOf; - case Op_AddVB: - case Op_AddVS: - case Op_AddVI: - case Op_AddVF: - case Op_AddVD: - case Op_SubVB: - case Op_SubVS: - case Op_SubVI: - case Op_SubVF: - case Op_SubVD: - case Op_MulVS: - case Op_MulVF: - case Op_MulVD: - case Op_DivVF: - case Op_DivVD: - case Op_AbsVF: - case Op_AbsVD: - case Op_NegVF: - case Op_NegVD: - case Op_SqrtVF: - case Op_SqrtVD: - case Op_AddVL: - case Op_SubVL: - case Op_MulVI: - case Op_RoundDoubleModeV: - return SuperwordUseVSX; - case Op_PopCountVI: - return (SuperwordUseVSX && UsePopCountInstruction); - case Op_FmaVF: - case Op_FmaVD: - return (SuperwordUseVSX && UseFMA); - case Op_Digit: - return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit); - case Op_LowerCase: - return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase); - case Op_UpperCase: - return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase); - case Op_Whitespace: - return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace); - - case Op_CacheWB: - case Op_CacheWBPreSync: - case Op_CacheWBPostSync: - if (!VM_Version::supports_data_cache_line_flush()) { - ret_value = false; - } - break; + case Op_SqrtD: + return VM_Version::has_fsqrt(); + case Op_CountLeadingZerosI: + case Op_CountLeadingZerosL: + return UseCountLeadingZerosInstructionsPPC64; + case Op_CountTrailingZerosI: + case Op_CountTrailingZerosL: + return (UseCountLeadingZerosInstructionsPPC64 || UseCountTrailingZerosInstructionsPPC64); + case Op_PopCountI: + case Op_PopCountL: + return (UsePopCountInstruction && VM_Version::has_popcntw()); + + case Op_AddVB: + case Op_AddVS: + case Op_AddVI: + case Op_AddVF: + case Op_AddVD: + case Op_SubVB: + case Op_SubVS: + case Op_SubVI: + case Op_SubVF: + case Op_SubVD: + case Op_MulVS: + case Op_MulVF: + case Op_MulVD: + case Op_DivVF: + case Op_DivVD: + case Op_AbsVF: + case Op_AbsVD: + case Op_NegVF: + case Op_NegVD: + case Op_SqrtVF: + case Op_SqrtVD: + case Op_AddVL: + case Op_SubVL: + case Op_MulVI: + case Op_RoundDoubleModeV: + return SuperwordUseVSX; + case Op_PopCountVI: + return (SuperwordUseVSX && UsePopCountInstruction); + case Op_FmaVF: + case Op_FmaVD: + return (SuperwordUseVSX && UseFMA); + + case Op_Digit: + return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isDigit); + case Op_LowerCase: + return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isLowerCase); + case Op_UpperCase: + return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isUpperCase); + case Op_Whitespace: + return vmIntrinsics::is_intrinsic_available(vmIntrinsics::_isWhitespace); + + case Op_CacheWB: + case Op_CacheWBPreSync: + case Op_CacheWBPostSync: + return VM_Version::supports_data_cache_line_flush(); } - return ret_value; // Per default match rules are supported. + return true; // Per default match rules are supported. } const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - - // TODO - // identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen - bool ret_value = match_rule_supported(opcode); - // Add rules here. - - return ret_value; // Per default match rules are supported. + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + return false; + } + return true; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { return false; } +bool Matcher::supports_vector_variable_shifts(void) { + return false; // not supported +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -2340,10 +2323,6 @@ const bool Matcher::rematerialize_float_constants = false; // Java calling convention forces doubles to be aligned. const bool Matcher::misaligned_doubles_ok = true; -void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) { - Unimplemented(); -} - // Advertise here if the CPU requires explicit rounding operations to implement strictfp mode. const bool Matcher::strict_fp_requires_explicit_rounding = false; @@ -3855,9 +3834,6 @@ frame %{ // Inline Cache Register or method for I2C. inline_cache_reg(R19); // R19_method - // Method Register when calling interpreter. - interpreter_method_reg(R19); // R19_method - // Optional: name the operand used by cisc-spilling to access // [stack_pointer + offset]. cisc_spilling_operand_name(indOffset); @@ -3912,7 +3888,7 @@ frame %{ // The `sig' array is to be updated. sig[j] represents the location // of the j-th argument, either a register or a stack slot. - // Comment taken from i486.ad: + // Comment taken from x86_32.ad: // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot @@ -3924,7 +3900,7 @@ frame %{ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); %} - // Comment taken from i486.ad: + // Comment taken from x86_32.ad: // Body of function which returns an integer array locating // arguments either in registers or in stack slots. Passed an array // of ideal registers called "sig" and a "length" count. Stack-slot @@ -4765,20 +4741,6 @@ operand inline_cache_regP(iRegPdst reg) %{ interface(REG_INTER); %} -operand compiler_method_regP(iRegPdst reg) %{ - constraint(ALLOC_IN_RC(rscratch1_bits64_reg)); // compiler_method_reg - match(reg); - format %{ %} - interface(REG_INTER); -%} - -operand interpreter_method_regP(iRegPdst reg) %{ - constraint(ALLOC_IN_RC(r19_bits64_reg)); // interpreter_method_reg - match(reg); - format %{ %} - interface(REG_INTER); -%} - // Operands to remove register moves in unscaled mode. // Match read/write registers with an EncodeP node if neither shift nor add are required. operand iRegP2N(iRegPsrc reg) %{ @@ -6588,6 +6550,23 @@ instruct storeV16(indirect mem, vecX src) %{ ins_pipe(pipe_class_default); %} +// Reinterpret: only one vector size used: either L or X +instruct reinterpretL(iRegLdst dst) %{ + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "reinterpret $dst" %} + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_empty); +%} + +instruct reinterpretX(vecX dst) %{ + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "reinterpret $dst" %} + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_empty); +%} + // Store Compressed Oop instruct storeN(memory dst, iRegN_P2N src) %{ match(Set dst (StoreN dst src)); @@ -12618,9 +12597,10 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr); + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); ins_cost(180); - format %{ "String IndexOfChar $haystack[0..$haycnt], $ch" + format %{ "StringUTF16 IndexOfChar $haystack[0..$haycnt], $ch" " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} ins_encode %{ __ string_indexof_char($result$$Register, @@ -12631,6 +12611,25 @@ instruct indexOfChar_U(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, ins_pipe(pipe_class_compare); %} +instruct indexOfChar_L(iRegIdst result, iRegPsrc haystack, iRegIsrc haycnt, + iRegIsrc ch, iRegIdst tmp1, iRegIdst tmp2, + flagsRegCR0 cr0, flagsRegCR1 cr1, regCTR ctr) %{ + match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); + effect(TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1, KILL ctr); + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + ins_cost(180); + + format %{ "StringLatin1 IndexOfChar $haystack[0..$haycnt], $ch" + " -> $result \t// KILL $haycnt, $tmp1, $tmp2, $cr0, $cr1" %} + ins_encode %{ + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + $ch$$Register, 0 /* this is not used if the character is already in a register */, + $tmp1$$Register, $tmp2$$Register, true /*is_byte*/); + %} + ins_pipe(pipe_class_compare); +%} + instruct indexOf_imm_U(iRegIdst result, iRegPsrc haystack, rscratch1RegI haycnt, iRegPsrc needle, uimmI15 needlecntImm, iRegIdst tmp1, iRegIdst tmp2, iRegIdst tmp3, iRegIdst tmp4, iRegIdst tmp5, diff --git a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp index add61ad738c..e8498ba0ed3 100644 --- a/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp +++ b/src/hotspot/cpu/ppc/sharedRuntime_ppc.cpp @@ -35,6 +35,7 @@ #include "memory/resourceArea.hpp" #include "oops/compiledICHolder.hpp" #include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/vframeArray.hpp" @@ -1529,156 +1530,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty } } -static void save_or_restore_arguments(MacroAssembler* masm, - const int stack_slots, - const int total_in_args, - const int arg_save_area, - OopMap* map, - VMRegPair* in_regs, - BasicType* in_sig_bt) { - // If map is non-NULL then the code should store the values, - // otherwise it should load them. - int slot = arg_save_area; - // Save down double word first. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) { - int offset = slot * VMRegImpl::stack_slot_size; - slot += VMRegImpl::slots_per_word; - assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)"); - if (map != NULL) { - __ stfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); - } else { - __ lfd(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); - } - } else if (in_regs[i].first()->is_Register() && - (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { - int offset = slot * VMRegImpl::stack_slot_size; - if (map != NULL) { - __ std(in_regs[i].first()->as_Register(), offset, R1_SP); - if (in_sig_bt[i] == T_ARRAY) { - map->set_oop(VMRegImpl::stack2reg(slot)); - } - } else { - __ ld(in_regs[i].first()->as_Register(), offset, R1_SP); - } - slot += VMRegImpl::slots_per_word; - assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)"); - } - } - // Save or restore single word registers. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - int offset = slot * VMRegImpl::stack_slot_size; - // Value lives in an input register. Save it on stack. - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - if (map != NULL) { - __ stw(in_regs[i].first()->as_Register(), offset, R1_SP); - } else { - __ lwa(in_regs[i].first()->as_Register(), offset, R1_SP); - } - slot++; - assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)"); - break; - case T_ARRAY: - case T_LONG: - // handled above - break; - case T_OBJECT: - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - if (in_sig_bt[i] == T_FLOAT) { - int offset = slot * VMRegImpl::stack_slot_size; - slot++; - assert(slot <= stack_slots, "overflow (after FLOAT stack slot)"); - if (map != NULL) { - __ stfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); - } else { - __ lfs(in_regs[i].first()->as_FloatRegister(), offset, R1_SP); - } - } - } else if (in_regs[i].first()->is_stack()) { - if (in_sig_bt[i] == T_ARRAY && map != NULL) { - int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); - } - } - } -} - -// Check GCLocker::needs_gc and enter the runtime if it's true. This -// keeps a new JNI critical region from starting until a GC has been -// forced. Save down any oops in registers and describe them in an -// OopMap. -static void check_needs_gc_for_critical_native(MacroAssembler* masm, - const int stack_slots, - const int total_in_args, - const int arg_save_area, - OopMapSet* oop_maps, - VMRegPair* in_regs, - BasicType* in_sig_bt, - Register tmp_reg ) { - __ block_comment("check GCLocker::needs_gc"); - Label cont; - __ lbz(tmp_reg, (RegisterOrConstant)(intptr_t)GCLocker::needs_gc_address()); - __ cmplwi(CCR0, tmp_reg, 0); - __ beq(CCR0, cont); - - // Save down any values that are live in registers and call into the - // runtime to halt for a GC. - OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, map, in_regs, in_sig_bt); - - __ mr(R3_ARG1, R16_thread); - __ set_last_Java_frame(R1_SP, noreg); - - __ block_comment("block_for_jni_critical"); - address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical); -#if defined(ABI_ELFv2) - __ call_c(entry_point, relocInfo::runtime_call_type); -#else - __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, entry_point), relocInfo::runtime_call_type); -#endif - address start = __ pc() - __ offset(), - calls_return_pc = __ last_calls_return_pc(); - oop_maps->add_gc_map(calls_return_pc - start, map); - - __ reset_last_Java_frame(); - - // Reload all the register arguments. - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, NULL, in_regs, in_sig_bt); - - __ BIND(cont); - -#ifdef ASSERT - if (StressCriticalJNINatives) { - // Stress register saving. - OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, map, in_regs, in_sig_bt); - // Destroy argument registers. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - const Register reg = in_regs[i].first()->as_Register(); - __ neg(reg, reg); - } else if (in_regs[i].first()->is_FloatRegister()) { - __ fneg(in_regs[i].first()->as_FloatRegister(), in_regs[i].first()->as_FloatRegister()); - } - } - - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, NULL, in_regs, in_sig_bt); - } -#endif -} - static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register r_caller_sp, Register r_temp) { if (src.first()->is_stack()) { if (dst.first()->is_stack()) { @@ -1820,25 +1671,12 @@ static void gen_special_dispatch(MacroAssembler* masm, // Critical native functions are a shorthand for the use of // GetPrimtiveArrayCritical and disallow the use of any other JNI // functions. The wrapper is expected to unpack the arguments before -// passing them to the callee and perform checks before and after the -// native call to ensure that they GCLocker -// lock_critical/unlock_critical semantics are followed. Some other -// parts of JNI setup are skipped like the tear down of the JNI handle +// passing them to the callee. Critical native functions leave the state _in_Java, +// since they cannot stop for GC. +// Some other parts of JNI setup are skipped like the tear down of the JNI handle // block and the check for pending exceptions it's impossible for them // to be thrown. // -// They are roughly structured like this: -// if (GCLocker::needs_gc()) -// SharedRuntime::block_for_jni_critical(); -// tranistion to thread_in_native -// unpack arrray arguments and call native entry point -// check for safepoint in progress -// check if any thread suspend flags are set -// call into JVM and possible unlock the JNI critical -// if a GC was suppressed while in the critical native. -// transition back to thread_in_Java -// return to caller -// nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, const methodHandle& method, int compile_id, @@ -2145,11 +1983,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, OopMapSet *oop_maps = new OopMapSet(); OopMap *oop_map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - if (is_critical_native) { - check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, oop_handle_slot_offset, - oop_maps, in_regs, in_sig_bt, r_temp_1); - } - // Move arguments from register/stack to register/stack. // -------------------------------------------------------------------------- // @@ -2350,18 +2183,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ bind(locked); } - - // Publish thread state - // -------------------------------------------------------------------------- - // Use that pc we placed in r_return_pc a while back as the current frame anchor. __ set_last_Java_frame(R1_SP, r_return_pc); - // Transition from _thread_in_Java to _thread_in_native. - __ li(R0, _thread_in_native); - __ release(); - // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); - __ stw(R0, thread_(thread_state)); + if (!is_critical_native) { + // Publish thread state + // -------------------------------------------------------------------------- + + // Transition from _thread_in_Java to _thread_in_native. + __ li(R0, _thread_in_native); + __ release(); + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + } // The JNI call @@ -2421,6 +2255,22 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, break; } + Label after_transition; + + // If this is a critical native, check for a safepoint or suspend request after the call. + // If a safepoint is needed, transition to native, then to native_trans to handle + // safepoints like the native methods that are not critical natives. + if (is_critical_native) { + Label needs_safepoint; + Register sync_state = r_temp_5; + __ safepoint_poll(needs_safepoint, sync_state); + + Register suspend_flags = r_temp_6; + __ lwz(suspend_flags, thread_(suspend_flags)); + __ cmpwi(CCR1, suspend_flags, 0); + __ beq(CCR1, after_transition); + __ bind(needs_safepoint); + } // Publish thread state // -------------------------------------------------------------------------- @@ -2448,7 +2298,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Block, if necessary, before resuming in _thread_in_Java state. // In order for GC to work, don't clear the last_Java_sp until after blocking. - Label after_transition; { Label no_block, sync; @@ -2476,31 +2325,27 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ bind(sync); __ isync(); - address entry_point = is_critical_native - ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition) - : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); + address entry_point = + CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); save_native_result(masm, ret_type, workspace_slot_offset); __ call_VM_leaf(entry_point, R16_thread); restore_native_result(masm, ret_type, workspace_slot_offset); - if (is_critical_native) { - __ b(after_transition); // No thread state transition here. - } __ bind(no_block); - } - // Publish thread state. - // -------------------------------------------------------------------------- + // Publish thread state. + // -------------------------------------------------------------------------- - // Thread state is thread_in_native_trans. Any safepoint blocking has - // already happened so we can now change state to _thread_in_Java. + // Thread state is thread_in_native_trans. Any safepoint blocking has + // already happened so we can now change state to _thread_in_Java. - // Transition from _thread_in_native_trans to _thread_in_Java. - __ li(R0, _thread_in_Java); - __ lwsync(); // Acquire safepoint and suspend state, release thread state. - // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); - __ stw(R0, thread_(thread_state)); - __ bind(after_transition); + // Transition from _thread_in_native_trans to _thread_in_Java. + __ li(R0, _thread_in_Java); + __ lwsync(); // Acquire safepoint and suspend state, release thread state. + // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + __ bind(after_transition); + } // Reguard any pages if necessary. // -------------------------------------------------------------------------- @@ -2657,10 +2502,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, in_ByteSize(lock_offset), oop_maps); - if (is_critical_native) { - nm->set_lazy_critical_native(true); - } - return nm; } diff --git a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp index 994f0a93827..525e4f05255 100644 --- a/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp +++ b/src/hotspot/cpu/ppc/templateInterpreterGenerator_ppc.cpp @@ -1549,9 +1549,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { // Handle exceptions if (synchronized) { - // Don't check for exceptions since we're still in the i2n frame. Do that - // manually afterwards. - __ unlock_object(R26_monitor, false); // Can also unlock methods. + __ unlock_object(R26_monitor); // Can also unlock methods. } // Reset active handles after returning from native. @@ -1592,9 +1590,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { BIND(exception_return_sync_check); if (synchronized) { - // Don't check for exceptions since we're still in the i2n frame. Do that - // manually afterwards. - __ unlock_object(R26_monitor, false); // Can also unlock methods. + __ unlock_object(R26_monitor); // Can also unlock methods. } BIND(exception_return_sync_check_already_unlocked); @@ -2105,7 +2101,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() { // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. __ ld(R4_ARG2, 0, R18_locals); __ call_VM(R4_ARG2, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), R4_ARG2, R19_method, R14_bcp); - __ restore_interpreter_state(R11_scratch1, /*bcp_and_mdx_only*/ true); + __ cmpdi(CCR0, R4_ARG2, 0); __ beq(CCR0, L_done); __ std(R4_ARG2, wordSize, R15_esp); diff --git a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp index e9ccfc7c481..cc341d83072 100644 --- a/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp +++ b/src/hotspot/cpu/ppc/templateTable_ppc_64.cpp @@ -2173,7 +2173,7 @@ void TemplateTable::_return(TosState state) { if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { Label no_safepoint; - __ ld(R11_scratch1, in_bytes(Thread::polling_page_offset()), R16_thread); + __ ld(R11_scratch1, in_bytes(Thread::polling_word_offset()), R16_thread); __ andi_(R11_scratch1, R11_scratch1, SafepointMechanism::poll_bit()); __ beq(CCR0, no_safepoint); __ push(state); diff --git a/src/hotspot/cpu/ppc/vm_version_ppc.cpp b/src/hotspot/cpu/ppc/vm_version_ppc.cpp index fd62cb5813a..f64999d108a 100644 --- a/src/hotspot/cpu/ppc/vm_version_ppc.cpp +++ b/src/hotspot/cpu/ppc/vm_version_ppc.cpp @@ -331,6 +331,11 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA, false); } diff --git a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp index 367d2a43af5..329c163f313 100644 --- a/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp +++ b/src/hotspot/cpu/s390/c1_CodeStubs_s390.cpp @@ -41,6 +41,10 @@ #undef CHECK_BAILOUT #define CHECK_BAILOUT() { if (ce->compilation()->bailed_out()) return; } +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + ShouldNotReachHere(); +} + RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array) : _index(index), _array(array), _throw_index_out_of_bounds_exception(false) { assert(info != NULL, "must have info"); diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp index 24c8178f1dc..4c7dc79e5e7 100644 --- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp @@ -1207,7 +1207,7 @@ void LIR_Assembler::reg2mem(LIR_Opr from, LIR_Opr dest_opr, BasicType type, } -void LIR_Assembler::return_op(LIR_Opr result) { +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { assert(result->is_illegal() || (result->is_single_cpu() && result->as_register() == Z_R2) || (result->is_double_cpu() && result->as_register_lo() == Z_R2) || diff --git a/src/hotspot/cpu/s390/c1_globals_s390.hpp b/src/hotspot/cpu/s390/c1_globals_s390.hpp index 99e26e5e3f8..7fcb1ee0617 100644 --- a/src/hotspot/cpu/s390/c1_globals_s390.hpp +++ b/src/hotspot/cpu/s390/c1_globals_s390.hpp @@ -43,9 +43,7 @@ define_pd_global(bool, TieredCompilation, false); define_pd_global(intx, CompileThreshold, 1000); define_pd_global(intx, OnStackReplacePercentage, 1400); -define_pd_global(bool, UseTLAB, true); define_pd_global(bool, ProfileInterpreter, false); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(uintx, ReservedCodeCacheSize, 32*M); define_pd_global(uintx, NonProfiledCodeHeapSize, 13*M); define_pd_global(uintx, ProfiledCodeHeapSize, 14*M); diff --git a/src/hotspot/cpu/s390/c2_globals_s390.hpp b/src/hotspot/cpu/s390/c2_globals_s390.hpp index 2f44fa73a2e..64d5585d616 100644 --- a/src/hotspot/cpu/s390/c2_globals_s390.hpp +++ b/src/hotspot/cpu/s390/c2_globals_s390.hpp @@ -51,8 +51,6 @@ define_pd_global(intx, INTPRESSURE, 10); // Medium size registe define_pd_global(intx, InteriorEntryAlignment, 2); define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K)); define_pd_global(intx, RegisterCostAreaRatio, 12000); -define_pd_global(bool, UseTLAB, true); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(intx, LoopUnrollLimit, 60); define_pd_global(intx, LoopPercentProfileLimit, 10); define_pd_global(intx, MinJumpTableSize, 18); diff --git a/src/hotspot/cpu/s390/interp_masm_s390.cpp b/src/hotspot/cpu/s390/interp_masm_s390.cpp index d612d528c51..4f44359b04d 100644 --- a/src/hotspot/cpu/s390/interp_masm_s390.cpp +++ b/src/hotspot/cpu/s390/interp_masm_s390.cpp @@ -121,7 +121,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, address* table, bo address *sfpt_tbl = Interpreter::safept_table(state); if (table != sfpt_tbl) { Label dispatch; - const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */); + const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */); // Armed page has poll_bit set, if poll bit is cleared just continue. z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); z_braz(dispatch); @@ -969,8 +969,7 @@ void InterpreterMacroAssembler::remove_activation(TosState state, void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { if (UseHeavyMonitors) { - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - monitor, /*check_for_exceptions=*/false); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); return; } @@ -1061,9 +1060,7 @@ void InterpreterMacroAssembler::lock_object(Register monitor, Register object) { // None of the above fast optimizations worked so we have to get into the // slow case of monitor enter. bind(slow_case); - - call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), - monitor, /*check_for_exceptions=*/false); + call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), monitor); // } diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.cpp b/src/hotspot/cpu/s390/macroAssembler_s390.cpp index c71a15daa7c..d7c95ee96ee 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.cpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.cpp @@ -1810,34 +1810,6 @@ void MacroAssembler::c2bool(Register r, Register t) { z_srl(r, 31); // Yields 0 if r was 0, 1 otherwise. } -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) { - return RegisterOrConstant(value + offset); - } - - BLOCK_COMMENT("delayed_value {"); - // Load indirectly to solve generation ordering problem. - load_absolute_address(tmp, (address) delayed_value_addr); // tmp = a; - z_lg(tmp, 0, tmp); // tmp = *tmp; - -#ifdef ASSERT - NearLabel L; - compare64_and_branch(tmp, (intptr_t)0L, Assembler::bcondNotEqual, L); - z_illtrap(); - bind(L); -#endif - - if (offset != 0) { - z_agfi(tmp, offset); // tmp = tmp + offset; - } - - BLOCK_COMMENT("} delayed_value"); - return RegisterOrConstant(tmp); -} - // Patch instruction `inst' at offset `inst_pos' to refer to `dest_pos' // and return the resulting instruction. // Dest_pos and inst_pos are 32 bit only. These parms can only designate @@ -2680,7 +2652,7 @@ uint MacroAssembler::get_poll_register(address instr_loc) { } void MacroAssembler::safepoint_poll(Label& slow_path, Register temp_reg) { - const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */); + const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */); // Armed page has poll_bit set. z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); z_brnaz(slow_path); diff --git a/src/hotspot/cpu/s390/macroAssembler_s390.hpp b/src/hotspot/cpu/s390/macroAssembler_s390.hpp index 41294b0fe87..113a1a3db2a 100644 --- a/src/hotspot/cpu/s390/macroAssembler_s390.hpp +++ b/src/hotspot/cpu/s390/macroAssembler_s390.hpp @@ -350,9 +350,6 @@ class MacroAssembler: public Assembler { // Uses constant_metadata_address. inline bool set_metadata_constant(Metadata* md, Register d); - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset); // // branch, jump // diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index bb98182d781..de1565194ed 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -278,9 +278,7 @@ alloc_class chunk2( // information in this architecture description. // 1) reg_class inline_cache_reg (as defined in frame section) -// 2) reg_class compiler_method_reg (as defined in frame section) -// 2) reg_class interpreter_method_reg (as defined in frame section) -// 3) reg_class stack_slots(/* one chunk of stack-based "registers" */) +// 2) reg_class stack_slots(/* one chunk of stack-based "registers" */) // Integer Register Classes reg_class z_int_reg( @@ -1513,66 +1511,38 @@ static Register reg_to_register_object(int register_encoding) { } const bool Matcher::match_rule_supported(int opcode) { - if (!has_match_rule(opcode)) return false; + if (!has_match_rule(opcode)) { + return false; // no match rule present + } switch (opcode) { - case Op_CountLeadingZerosI: - case Op_CountLeadingZerosL: - case Op_CountTrailingZerosI: - case Op_CountTrailingZerosL: - // Implementation requires FLOGR instruction, which is available since z9. - return true; - case Op_ReverseBytesI: case Op_ReverseBytesL: return UseByteReverseInstruction; - - // PopCount supported by H/W from z/Architecture G5 (z196) on. case Op_PopCountI: case Op_PopCountL: - return UsePopCountInstruction && VM_Version::has_PopCount(); - - case Op_StrComp: - return SpecialStringCompareTo; - case Op_StrEquals: - return SpecialStringEquals; - case Op_StrIndexOf: - case Op_StrIndexOfChar: - return SpecialStringIndexOf; - - case Op_GetAndAddI: - case Op_GetAndAddL: - return true; - // return VM_Version::has_AtomicMemWithImmALUOps(); - case Op_GetAndSetI: - case Op_GetAndSetL: - case Op_GetAndSetP: - case Op_GetAndSetN: - return true; // General CAS implementation, always available. - - default: - return true; // Per default match rules are supported. - // BUT: make sure match rule is not disabled by a false predicate! + // PopCount supported by H/W from z/Architecture G5 (z196) on. + return (UsePopCountInstruction && VM_Version::has_PopCount()); } - return true; // Per default match rules are supported. - // BUT: make sure match rule is not disabled by a false predicate! + return true; // Per default match rules are supported. } const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { - // TODO - // Identify extra cases that we might want to provide match rules for - // e.g. Op_ vector nodes and other intrinsics while guarding with vlen. - bool ret_value = match_rule_supported(opcode); - // Add rules here. - - return ret_value; // Per default match rules are supported. + if (!match_rule_supported(opcode) || !vector_size_supported(bt, vlen)) { + return false; + } + return true; // Per default match rules are supported. } const bool Matcher::has_predicated_vectors(void) { return false; } +bool Matcher::supports_vector_variable_shifts(void) { + return false; // not supported +} + const int Matcher::float_pressure(int default_pressure_threshold) { return default_pressure_threshold; } @@ -2462,12 +2432,6 @@ frame %{ // Tos is loaded in run_compiled_code to Z_ARG5=Z_R6. // interpreter_arg_ptr_reg(Z_R6); - // Temporary in compiled entry-points - // compiler_method_reg(Z_R1);//Z_R1_scratch - - // Method Register when calling interpreter - interpreter_method_reg(Z_R9);//Z_method - // Optional: name the operand used by cisc-spilling to access // [stack_pointer + offset]. cisc_spilling_operand_name(indOffset12); @@ -3531,20 +3495,6 @@ operand inline_cache_regP(iRegP reg) %{ interface(REG_INTER); %} -operand compiler_method_regP(iRegP reg) %{ - constraint(ALLOC_IN_RC(z_r1_RegP)); // compiler_method_reg - match(reg); - format %{ %} - interface(REG_INTER); -%} - -operand interpreter_method_regP(iRegP reg) %{ - constraint(ALLOC_IN_RC(z_r9_regP)); // interpreter_method_reg - match(reg); - format %{ %} - interface(REG_INTER); -%} - // Operands to remove register moves in unscaled mode. // Match read/write registers with an EncodeP node if neither shift nor add are required. operand iRegP2N(iRegP reg) %{ @@ -10172,8 +10122,9 @@ instruct string_compareUL(iRegP str1, iRegP str2, rarg2RegI cnt1, rarg5RegI cnt2 instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::U); ins_cost(200); - format %{ "String IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %} + format %{ "StringUTF16 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %} ins_encode %{ __ string_indexof_char($result$$Register, $haystack$$Register, $haycnt$$Register, @@ -10183,6 +10134,21 @@ instruct indexOfChar_U(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, rod ins_pipe(pipe_class_dummy); %} +instruct indexOfChar_L(iRegP haystack, iRegI haycnt, iRegI ch, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ + match(Set result (StrIndexOfChar (Binary haystack haycnt) ch)); + effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. + predicate(((StrIndexOfCharNode*)n)->encoding() == StrIntrinsicNode::L); + ins_cost(200); + format %{ "StringLatin1 IndexOfChar [0..$haycnt]($haystack), $ch -> $result" %} + ins_encode %{ + __ string_indexof_char($result$$Register, + $haystack$$Register, $haycnt$$Register, + $ch$$Register, 0 /* unused, ch is in register */, + $oddReg$$Register, $evenReg$$Register, true /*is_byte*/); + %} + ins_pipe(pipe_class_dummy); +%} + instruct indexOf_imm1_U(iRegP haystack, iRegI haycnt, immP needle, immI_1 needlecnt, iRegI result, roddRegL oddReg, revenRegL evenReg, flagsReg cr) %{ match(Set result (StrIndexOf (Binary haystack haycnt) (Binary needle needlecnt))); effect(TEMP_DEF result, TEMP evenReg, TEMP oddReg, KILL cr); // R0, R1 are killed, too. @@ -10809,7 +10775,7 @@ instruct Repl2F_imm0(iRegL dst, immFp0 src) %{ ins_pipe(pipe_class_dummy); %} -// Store +// Load/Store vector // Store Aligned Packed Byte register to memory (8 Bytes). instruct storeA8B(memory mem, iRegL src) %{ @@ -10823,8 +10789,6 @@ instruct storeA8B(memory mem, iRegL src) %{ ins_pipe(pipe_class_dummy); %} -// Load - instruct loadV8(iRegL dst, memory mem) %{ match(Set dst (LoadVector mem)); predicate(n->as_LoadVector()->memory_size() == 8); @@ -10836,6 +10800,15 @@ instruct loadV8(iRegL dst, memory mem) %{ ins_pipe(pipe_class_dummy); %} +// Reinterpret: only one vector size used +instruct reinterpret(iRegL dst) %{ + match(Set dst (VectorReinterpret dst)); + ins_cost(0); + format %{ "reinterpret $dst" %} + ins_encode( /*empty*/ ); + ins_pipe(pipe_class_dummy); +%} + //----------POPULATION COUNT RULES-------------------------------------------- // Byte reverse diff --git a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp index 48ac8ae443c..a0c46b182ff 100644 --- a/src/hotspot/cpu/s390/sharedRuntime_s390.cpp +++ b/src/hotspot/cpu/s390/sharedRuntime_s390.cpp @@ -35,6 +35,7 @@ #include "nativeInst_s390.hpp" #include "oops/compiledICHolder.hpp" #include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" #include "registerSaver_s390.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" @@ -1284,163 +1285,6 @@ static void move32_64(MacroAssembler *masm, } } -static void save_or_restore_arguments(MacroAssembler *masm, - const int stack_slots, - const int total_in_args, - const int arg_save_area, - OopMap *map, - VMRegPair *in_regs, - BasicType *in_sig_bt) { - - // If map is non-NULL then the code should store the values, - // otherwise it should load them. - int slot = arg_save_area; - // Handle double words first. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) { - int offset = slot * VMRegImpl::stack_slot_size; - slot += VMRegImpl::slots_per_word; - assert(slot <= stack_slots, "overflow (after DOUBLE stack slot)"); - const FloatRegister freg = in_regs[i].first()->as_FloatRegister(); - Address stackaddr(Z_SP, offset); - if (map != NULL) { - __ freg2mem_opt(freg, stackaddr); - } else { - __ mem2freg_opt(freg, stackaddr); - } - } else if (in_regs[i].first()->is_Register() && - (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) { - int offset = slot * VMRegImpl::stack_slot_size; - const Register reg = in_regs[i].first()->as_Register(); - if (map != NULL) { - __ z_stg(reg, offset, Z_SP); - if (in_sig_bt[i] == T_ARRAY) { - map->set_oop(VMRegImpl::stack2reg(slot)); - } - } else { - __ z_lg(reg, offset, Z_SP); - } - slot += VMRegImpl::slots_per_word; - assert(slot <= stack_slots, "overflow (after LONG/ARRAY stack slot)"); - } - } - - // Save or restore single word registers. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - int offset = slot * VMRegImpl::stack_slot_size; - // Value lives in an input register. Save it on stack. - switch (in_sig_bt[i]) { - case T_BOOLEAN: - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: { - const Register reg = in_regs[i].first()->as_Register(); - Address stackaddr(Z_SP, offset); - if (map != NULL) { - __ z_st(reg, stackaddr); - } else { - __ z_lgf(reg, stackaddr); - } - slot++; - assert(slot <= stack_slots, "overflow (after INT or smaller stack slot)"); - break; - } - case T_ARRAY: - case T_LONG: - // handled above - break; - case T_OBJECT: - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_FloatRegister()) { - if (in_sig_bt[i] == T_FLOAT) { - int offset = slot * VMRegImpl::stack_slot_size; - slot++; - assert(slot <= stack_slots, "overflow (after FLOAT stack slot)"); - const FloatRegister freg = in_regs[i].first()->as_FloatRegister(); - Address stackaddr(Z_SP, offset); - if (map != NULL) { - __ freg2mem_opt(freg, stackaddr, false); - } else { - __ mem2freg_opt(freg, stackaddr, false); - } - } - } else if (in_regs[i].first()->is_stack() && - in_sig_bt[i] == T_ARRAY && map != NULL) { - int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); - } - } -} - -// Check GCLocker::needs_gc and enter the runtime if it's true. This -// keeps a new JNI critical region from starting until a GC has been -// forced. Save down any oops in registers and describe them in an OopMap. -static void check_needs_gc_for_critical_native(MacroAssembler *masm, - const int stack_slots, - const int total_in_args, - const int arg_save_area, - OopMapSet *oop_maps, - VMRegPair *in_regs, - BasicType *in_sig_bt) { - __ block_comment("check GCLocker::needs_gc"); - Label cont; - - // Check GCLocker::_needs_gc flag. - __ load_const_optimized(Z_R1_scratch, (long) GCLocker::needs_gc_address()); - __ z_cli(0, Z_R1_scratch, 0); - __ z_bre(cont); - - // Save down any values that are live in registers and call into the - // runtime to halt for a GC. - OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, map, in_regs, in_sig_bt); - address the_pc = __ pc(); - __ set_last_Java_frame(Z_SP, noreg); - - __ block_comment("block_for_jni_critical"); - __ z_lgr(Z_ARG1, Z_thread); - - address entry_point = CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical); - __ call_c(entry_point); - oop_maps->add_gc_map(__ offset(), map); - - __ reset_last_Java_frame(); - - // Reload all the register arguments. - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, NULL, in_regs, in_sig_bt); - - __ bind(cont); - - if (StressCriticalJNINatives) { - // Stress register saving - OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, map, in_regs, in_sig_bt); - - // Destroy argument registers. - for (int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - // Don't set CC. - __ clear_reg(in_regs[i].first()->as_Register(), true, false); - } else { - if (in_regs[i].first()->is_FloatRegister()) { - FloatRegister fr = in_regs[i].first()->as_FloatRegister(); - __ z_lcdbr(fr, fr); - } - } - } - - save_or_restore_arguments(masm, stack_slots, total_in_args, - arg_save_area, NULL, in_regs, in_sig_bt); - } -} - static void move_ptr(MacroAssembler *masm, VMRegPair src, VMRegPair dst, @@ -1857,12 +1701,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, OopMapSet *oop_maps = new OopMapSet(); OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); - if (is_critical_native) { - check_needs_gc_for_critical_native(masm, stack_slots, total_in_args, - oop_handle_slot_offset, oop_maps, in_regs, in_sig_bt); - } - - ////////////////////////////////////////////////////////////////////// // // The Grand Shuffle @@ -2091,9 +1929,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Use that pc we placed in Z_R10 a while back as the current frame anchor. __ set_last_Java_frame(Z_SP, Z_R10); - // Transition from _thread_in_Java to _thread_in_native. - __ set_thread_state(_thread_in_native); - + if (!is_critical_native) { + // Transition from _thread_in_Java to _thread_in_native. + __ set_thread_state(_thread_in_native); + } ////////////////////////////////////////////////////////////////////// // This is the JNI call. @@ -2139,6 +1978,19 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, break; } + Label after_transition; + + // If this is a critical native, check for a safepoint or suspend request after the call. + // If a safepoint is needed, transition to native, then to native_trans to handle + // safepoints like the native methods that are not critical natives. + if (is_critical_native) { + Label needs_safepoint; + // Does this need to save_native_result and fences? + __ safepoint_poll(needs_safepoint, Z_R1); + __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset())); + __ z_bre(after_transition); + __ bind(needs_safepoint); + } // Switch thread to "native transition" state before reading the synchronization state. // This additional state is necessary because reading and testing the synchronization @@ -2158,7 +2010,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, // Block, if necessary, before resuming in _thread_in_Java state. // In order for GC to work, don't clear the last_Java_sp until after blocking. //-------------------------------------------------------------------- - Label after_transition; { Label no_block, sync; @@ -2180,15 +2031,10 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ bind(sync); __ z_acquire(); - address entry_point = is_critical_native ? CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition) - : CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); + address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); __ call_VM_leaf(entry_point, Z_thread); - if (is_critical_native) { - restore_native_result(masm, ret_type, workspace_slot_offset); - __ z_bru(after_transition); // No thread state transition here. - } __ bind(no_block); restore_native_result(masm, ret_type, workspace_slot_offset); } @@ -2201,7 +2047,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, __ set_thread_state(_thread_in_Java); __ bind(after_transition); - //-------------------------------------------------------------------- // Reguard any pages if necessary. // Protect native result from being destroyed. @@ -2384,10 +2229,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, in_ByteSize(lock_offset), oop_maps); - if (is_critical_native) { - nm->set_lazy_critical_native(true); - } - return nm; } diff --git a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp index 5d8b11332d8..e1862f11c49 100644 --- a/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp +++ b/src/hotspot/cpu/s390/templateInterpreterGenerator_s390.cpp @@ -856,7 +856,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register frame_ // Compute the beginning of the protected zone minus the requested frame size. __ z_sgr(tmp1, tmp2); - __ add2reg(tmp1, JavaThread::stack_guard_zone_size()); + __ add2reg(tmp1, StackOverflow::stack_guard_zone_size()); // Add in the size of the frame (which is the same as subtracting it from the // SP, which would take another register. diff --git a/src/hotspot/cpu/s390/templateTable_s390.cpp b/src/hotspot/cpu/s390/templateTable_s390.cpp index 9c372db9e78..7a4cf869c30 100644 --- a/src/hotspot/cpu/s390/templateTable_s390.cpp +++ b/src/hotspot/cpu/s390/templateTable_s390.cpp @@ -2007,7 +2007,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) { // Out-of-line code runtime calls. if (UseLoopCounter) { - if (ProfileInterpreter) { + if (ProfileInterpreter && !TieredCompilation) { // Out-of-line code to allocate method data oop. __ bind(profile_method); @@ -2377,7 +2377,7 @@ void TemplateTable::_return(TosState state) { if (_desc->bytecode() != Bytecodes::_return_register_finalizer) { Label no_safepoint; - const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_page_offset()) + 7 /* Big Endian */); + const Address poll_byte_addr(Z_thread, in_bytes(Thread::polling_word_offset()) + 7 /* Big Endian */); __ z_tm(poll_byte_addr, SafepointMechanism::poll_bit()); __ z_braz(no_safepoint); __ push(state); diff --git a/src/hotspot/cpu/s390/vm_version_s390.cpp b/src/hotspot/cpu/s390/vm_version_s390.cpp index 3460a767fac..0a769c9401f 100644 --- a/src/hotspot/cpu/s390/vm_version_s390.cpp +++ b/src/hotspot/cpu/s390/vm_version_s390.cpp @@ -221,6 +221,11 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseSHA3Intrinsics) { + warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); + FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); + } + if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { FLAG_SET_DEFAULT(UseSHA, false); } @@ -831,7 +836,7 @@ void VM_Version::determine_features() { code_end-code, cbuf_size, cbuf_size-(code_end-code)); // Use existing decode function. This enables the [MachCode] format which is needed to DecodeErrorFile. - Disassembler::decode(&cbuf, code, code_end, tty); + Disassembler::decode(code, code_end, tty); } // Prepare for detection code execution and clear work buffer. diff --git a/src/hotspot/cpu/x86/assembler_x86.cpp b/src/hotspot/cpu/x86/assembler_x86.cpp index ef04d33c7f4..3933bac000f 100644 --- a/src/hotspot/cpu/x86/assembler_x86.cpp +++ b/src/hotspot/cpu/x86/assembler_x86.cpp @@ -984,6 +984,8 @@ address Assembler::locate_operand(address inst, WhichOperand which) { case 0x61: // pcmpestri r, r/a, #8 case 0x70: // pshufd r, r/a, #8 case 0x73: // psrldq r, #8 + case 0x1f: // evpcmpd/evpcmpq + case 0x3f: // evpcmpb/evpcmpw tail_size = 1; // the imm8 break; default: @@ -1209,6 +1211,11 @@ void Assembler::addb(Address dst, int imm8) { emit_int8(imm8); } +void Assembler::addw(Register dst, Register src) { + (void)prefix_and_encode(dst->encoding(), src->encoding()); + emit_arith(0x03, 0xC0, dst, src); +} + void Assembler::addw(Address dst, int imm16) { InstructionMark im(this); emit_int8(0x66); @@ -1415,6 +1422,11 @@ void Assembler::vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, i emit_int16((unsigned char)0xDD, (0xC0 | encode)); } +void Assembler::andw(Register dst, Register src) { + (void)prefix_and_encode(dst->encoding(), src->encoding()); + emit_arith(0x23, 0xC0, dst, src); +} + void Assembler::andl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -1783,6 +1795,13 @@ void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) { emit_int16((unsigned char)0xE6, (0xC0 | encode)); } +void Assembler::vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE6, (0xC0 | encode)); +} + void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -1790,6 +1809,13 @@ void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) { emit_int16(0x5B, (0xC0 | encode)); } +void Assembler::vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5B, (0xC0 | encode)); +} + void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); @@ -1912,18 +1938,18 @@ void Assembler::pabsd(XMMRegister dst, XMMRegister src) { } void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) { - assert(vector_len == AVX_128bit? VM_Version::supports_avx() : - vector_len == AVX_256bit? VM_Version::supports_avx2() : - vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported"); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int16(0x1C, (0xC0 | encode)); } void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) { - assert(vector_len == AVX_128bit? VM_Version::supports_avx() : - vector_len == AVX_256bit? VM_Version::supports_avx2() : - vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, ""); + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, ""); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int16(0x1D, (0xC0 | encode)); @@ -1946,6 +1972,85 @@ void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) { emit_int16(0x1F, (0xC0 | encode)); } +void Assembler::vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5A, (0xC0 | encode)); +} + +void Assembler::vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + attributes.set_rex_vex_w_reverted(); + emit_int16(0x5A, (0xC0 | encode)); +} + +void Assembler::evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5B, (0xC0 | encode)); +} + +void Assembler::evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xE6, (0xC0 | encode)); +} + +void Assembler::evpmovwb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2 && VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x30, (0xC0 | encode)); +} + +void Assembler::evpmovdw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x33, (0xC0 | encode)); +} + +void Assembler::evpmovdb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x31, (0xC0 | encode)); +} + +void Assembler::evpmovqd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x35, (0xC0 | encode)); +} + +void Assembler::evpmovqb(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x32, (0xC0 | encode)); +} + +void Assembler::evpmovqw(XMMRegister dst, XMMRegister src, int vector_len) { + assert(UseAVX > 2, ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x34, (0xC0 | encode)); +} + void Assembler::decl(Address dst) { // Don't use it directly. Use MacroAssembler::decrement() instead. InstructionMark im(this); @@ -2543,28 +2648,34 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) { } // Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64) -void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) { +void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); emit_int16(0x6F, (0xC0 | encode)); } -void Assembler::evmovdqub(XMMRegister dst, Address src, int vector_len) { +void Assembler::evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } -void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) { +void Assembler::evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); @@ -2572,132 +2683,234 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) { int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } -void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) { +void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } -void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) { +void Assembler::evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type) { + assert(VM_Version::supports_avx512vlbw(), ""); + assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, ""); + InstructionMark im(this); + bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG; + int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3; + InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); + emit_int8(0x6F); + emit_operand(dst, src); +} + +void Assembler::evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type) { + assert(VM_Version::supports_avx512vlbw(), ""); + assert(src != xnoreg, "sanity"); + assert(type == T_BYTE || type == T_SHORT || type == T_CHAR || type == T_INT || type == T_LONG, ""); + InstructionMark im(this); + bool wide = type == T_SHORT || type == T_CHAR || type == T_LONG; + int prefix = (type == T_BYTE || type == T_SHORT || type == T_CHAR) ? VEX_SIMD_F2 : VEX_SIMD_F3; + InstructionAttr attributes(vector_len, /* vex_w */ wide, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); + emit_int8(0x7F); + emit_operand(src, dst); +} + +void Assembler::evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; vex_prefix(src, 0, dst->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } -void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) { +void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } -void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) { +void Assembler::evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3; vex_prefix(dst, 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } -void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) { +void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); - attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { + // Unmasked instruction + evmovdqul(dst, k0, src, /*merge*/ false, vector_len); +} + +void Assembler::evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x6F, (0xC0 | encode)); } void Assembler::evmovdqul(XMMRegister dst, Address src, int vector_len) { + // Unmasked instruction + evmovdqul(dst, k0, src, /*merge*/ false, vector_len); +} + +void Assembler::evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true , /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false , /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdqul(Address dst, XMMRegister src, int vector_len) { + // Unmasked isntruction + evmovdqul(dst, k0, src, /*merge*/ true, vector_len); +} + +void Assembler::evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); - attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); emit_operand(src, dst); } void Assembler::evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { + // Unmasked instruction + if (dst->encoding() == src->encoding()) return; + evmovdquq(dst, k0, src, /*merge*/ false, vector_len); +} + +void Assembler::evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int16(0x6F, (0xC0 | encode)); } void Assembler::evmovdquq(XMMRegister dst, Address src, int vector_len) { + // Unmasked instruction + evmovdquq(dst, k0, src, /*merge*/ false, vector_len); +} + +void Assembler::evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); + if (merge) { + attributes.reset_is_clear_context(); + } vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x6F); emit_operand(dst, src); } void Assembler::evmovdquq(Address dst, XMMRegister src, int vector_len) { + // Unmasked instruction + evmovdquq(dst, k0, src, /*merge*/ true, vector_len); +} + +void Assembler::evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(src != xnoreg, "sanity"); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); - attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } attributes.set_is_evex_instruction(); vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); emit_int8(0x7F); @@ -2775,6 +2988,29 @@ void Assembler::movq(Address dst, XMMRegister src) { emit_operand(src, dst); } +void Assembler::movq(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = simd_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD6, (0xC0 | encode)); +} + +void Assembler::movq(Register dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + // swap src/dst to get correct prefix + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x7E, (0xC0 | encode)); +} + +void Assembler::movq(XMMRegister dst, Register src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x6E, (0xC0 | encode)); +} + void Assembler::movsbl(Register dst, Address src) { // movsxb InstructionMark im(this); prefix(src, dst); @@ -3274,6 +3510,11 @@ void Assembler::notl(Register dst) { emit_int16((unsigned char)0xF7, (0xD0 | encode)); } +void Assembler::orw(Register dst, Register src) { + (void)prefix_and_encode(dst->encoding(), src->encoding()); + emit_arith(0x0B, 0xC0, dst, src); +} + void Assembler::orl(Address dst, int32_t imm32) { InstructionMark im(this); prefix(dst); @@ -3312,6 +3553,34 @@ void Assembler::orb(Address dst, int imm8) { emit_int8(imm8); } +void Assembler::packsswb(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x63, (0xC0 | encode)); +} + +void Assembler::vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "some form of AVX must be enabled"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x63, (0xC0 | encode)); +} + +void Assembler::packssdw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x6B, (0xC0 | encode)); +} + +void Assembler::vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "some form of AVX must be enabled"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x6B, (0xC0 | encode)); +} + void Assembler::packuswb(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); @@ -3337,21 +3606,74 @@ void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16(0x67, (0xC0 | encode)); } +void Assembler::packusdw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x2B, (0xC0 | encode)); +} + +void Assembler::vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "some form of AVX must be enabled"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x2B, (0xC0 | encode)); +} + void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { assert(VM_Version::supports_avx2(), ""); + assert(vector_len != AVX_128bit, ""); + // VEX.256.66.0F3A.W1 00 /r ib InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x00, (0xC0 | encode), imm8); } void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(UseAVX > 2, "requires AVX512F"); + assert(vector_len == AVX_256bit ? VM_Version::supports_avx512vl() : + vector_len == AVX_512bit ? VM_Version::supports_evex() : false, "not supported"); InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int16(0x36, (0xC0 | encode)); } +void Assembler::vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512_vbmi(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x8D, (0xC0 | encode)); +} + +void Assembler::vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx512vlbw() : + vector_len == AVX_256bit ? VM_Version::supports_avx512vlbw() : + vector_len == AVX_512bit ? VM_Version::supports_avx512bw() : false, "not supported"); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0x8D, (0xC0 | encode)); +} + +void Assembler::vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), ""); + // VEX.NDS.256.66.0F38.W0 36 /r + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x36, (0xC0 | encode)); +} + +void Assembler::vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), ""); + // VEX.NDS.256.66.0F38.W0 36 /r + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8(0x36); + emit_operand(dst, src); +} + void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { assert(VM_Version::supports_avx2(), ""); InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); @@ -3366,6 +3688,28 @@ void Assembler::vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, in emit_int24(0x06, (0xC0 | encode), imm8); } +void Assembler::vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x04, (0xC0 | encode), imm8); +} + +void Assembler::vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(),/* legacy_mode */ false,/* no_mask_reg */ true, /* uses_vl */ false); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x05, (0xC0 | encode), imm8); +} + +void Assembler::vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len) { + assert(vector_len <= AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x01, (0xC0 | encode), imm8); +} + void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -3374,7 +3718,6 @@ void Assembler::evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int emit_int16(0x76, (0xC0 | encode)); } - void Assembler::pause() { emit_int16((unsigned char)0xF3, (unsigned char)0x90); } @@ -3408,9 +3751,18 @@ void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) { emit_int16(0x74, (0xC0 | encode)); } +void Assembler::vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(cond_encoding, (0xC0 | encode)); +} + // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(VM_Version::supports_avx(), ""); + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x74, (0xC0 | encode)); @@ -3497,7 +3849,7 @@ void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vect void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); @@ -3517,7 +3869,8 @@ void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) { // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(VM_Version::supports_avx(), ""); + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x75, (0xC0 | encode)); @@ -3554,29 +3907,32 @@ void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) { // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - assert(VM_Version::supports_avx(), ""); - InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x76, (0xC0 | encode)); } // In this context, kdst is written the mask used to process the equal components -void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) { +void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), ""); - InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_is_evex_instruction(); attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int16(0x76, (0xC0 | encode)); } -void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) { +void Assembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) { assert(VM_Version::supports_evex(), ""); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit); - attributes.reset_is_clear_context(); attributes.set_is_evex_instruction(); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); int dst_enc = kdst->encoding(); vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8(0x76); @@ -3591,6 +3947,13 @@ void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) { emit_int16(0x29, (0xC0 | encode)); } +void Assembler::vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(cond_encoding, (0xC0 | encode)); +} + // In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst void Assembler::vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx(), ""); @@ -3623,11 +3986,36 @@ void Assembler::evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vect emit_operand(as_Register(dst_enc), src); } -void Assembler::pmovmskb(Register dst, XMMRegister src) { - assert(VM_Version::supports_sse2(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); - int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int16((unsigned char)0xD7, (0xC0 | encode)); +void Assembler::evpmovd2m(KRegister kdst, XMMRegister src, int vector_len) { + assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::evpmovq2m(KRegister kdst, XMMRegister src, int vector_len) { + assert(UseAVX > 2 && VM_Version::supports_avx512dq(), ""); + assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(kdst->encoding(), 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); +} + +void Assembler::pcmpgtq(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x37, (0xC0 | encode)); +} + +void Assembler::pmovmskb(Register dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xD7, (0xC0 | encode)); } void Assembler::vpmovmskb(Register dst, XMMRegister src) { @@ -3639,14 +4027,14 @@ void Assembler::vpmovmskb(Register dst, XMMRegister src) { void Assembler::pextrd(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x16, (0xC0 | encode), imm8); } void Assembler::pextrd(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); @@ -3656,14 +4044,14 @@ void Assembler::pextrd(Address dst, XMMRegister src, int imm8) { void Assembler::pextrq(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x16, (0xC0 | encode), imm8); } void Assembler::pextrq(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x16); @@ -3673,14 +4061,14 @@ void Assembler::pextrq(Address dst, XMMRegister src, int imm8) { void Assembler::pextrw(Register dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse2(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24((unsigned char)0xC5, (0xC0 | encode), imm8); } void Assembler::pextrw(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x15); @@ -3688,9 +4076,16 @@ void Assembler::pextrw(Address dst, XMMRegister src, int imm8) { emit_int8(imm8); } +void Assembler::pextrb(Register dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x14, (0xC0 | encode), imm8); +} + void Assembler::pextrb(Address dst, XMMRegister src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit); simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x14); @@ -3700,14 +4095,14 @@ void Assembler::pextrb(Address dst, XMMRegister src, int imm8) { void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x22, (0xC0 | encode), imm8); } void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); @@ -3715,16 +4110,23 @@ void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) { emit_int8(imm8); } +void Assembler::vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x22, (0xC0 | encode), imm8); +} + void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int24(0x22, (0xC0 | encode), imm8); } void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x22); @@ -3732,16 +4134,23 @@ void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) { emit_int8(imm8); } +void Assembler::vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x22, (0xC0 | encode), imm8); +} + void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) { assert(VM_Version::supports_sse2(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8); } void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse2(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int8((unsigned char)0xC4); @@ -3749,9 +4158,16 @@ void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) { emit_int8(imm8); } +void Assembler::vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC4, (0xC0 | encode), imm8); +} + void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) { assert(VM_Version::supports_sse4_1(), ""); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit); simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); emit_int8(0x20); @@ -3759,6 +4175,34 @@ void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) { emit_int8(imm8); } +void Assembler::pinsrb(XMMRegister dst, Register src, int imm8) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x20, (0xC0 | encode), imm8); +} + +void Assembler::vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x20, (0xC0 | encode), imm8); +} + +void Assembler::insertps(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x21, (0xC0 | encode), imm8); +} + +void Assembler::vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x21, (0xC0 | encode), imm8); +} + void Assembler::pmovzxbw(XMMRegister dst, Address src) { assert(VM_Version::supports_sse4_1(), ""); InstructionMark im(this); @@ -3783,6 +4227,41 @@ void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) { emit_int16(0x20, (0xC0 | encode)); } +void Assembler::pmovzxdq(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x35, (0xC0 | encode)); +} + +void Assembler::pmovsxbd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x21, (0xC0 | encode)); +} + +void Assembler::pmovzxbd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x31, (0xC0 | encode)); +} + +void Assembler::pmovsxbq(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x22, (0xC0 | encode)); +} + +void Assembler::pmovsxwd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x23, (0xC0 | encode)); +} + void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(VM_Version::supports_avx(), ""); InstructionMark im(this); @@ -3816,7 +4295,7 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec assert(VM_Version::supports_avx512vlbw(), ""); assert(dst != xnoreg, "sanity"); InstructionMark im(this); - InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -3824,6 +4303,86 @@ void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vec emit_int8(0x30); emit_operand(dst, src); } + +void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W0 DB /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xDB, (0xC0 | encode)); +} + +void Assembler::vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x35, (0xC0 | encode)); +} + +void Assembler::vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x31, (0xC0 | encode)); +} + +void Assembler::vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len > AVX_128bit ? VM_Version::supports_avx2() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x32, (0xC0 | encode)); +} + +void Assembler::vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x21, (0xC0 | encode)); +} + +void Assembler::vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x22, (0xC0 | encode)); +} + +void Assembler::vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x23, (0xC0 | encode)); +} + +void Assembler::vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x24, (0xC0 | encode)); +} + +void Assembler::vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + vector_len == AVX_256bit ? VM_Version::supports_avx2() : + VM_Version::supports_evex(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x25, (0xC0 | encode)); +} + void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) { assert(VM_Version::supports_avx512vlbw(), ""); assert(src != xnoreg, "sanity"); @@ -4050,6 +4609,14 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) { emit_int8(mode & 0xFF); } +void Assembler::pshufhw(XMMRegister dst, XMMRegister src, int mode) { + assert(isByte(mode), "invalid value"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, VEX_OPCODE_0F, &attributes); + emit_int24(0x70, (0xC0 | encode), mode & 0xFF); +} + void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) { assert(isByte(mode), "invalid value"); NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -4080,6 +4647,35 @@ void Assembler::evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, i emit_int24(0x43, (0xC0 | encode), imm8 & 0xFF); } +void Assembler::pshufpd(XMMRegister dst, XMMRegister src, int imm8) { + assert(isByte(imm8), "invalid value"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); +} + +void Assembler::vpshufpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); +} + +void Assembler::pshufps(XMMRegister dst, XMMRegister src, int imm8) { + assert(isByte(imm8), "invalid value"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); +} + +void Assembler::vpshufps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC6, (0xC0 | encode), imm8 & 0xFF); +} + void Assembler::psrldq(XMMRegister dst, int shift) { // Shift left 128 bit value in dst XMMRegister by shift number of bytes. NOT_LP64(assert(VM_Version::supports_sse2(), "")); @@ -4151,6 +4747,13 @@ void Assembler::vptest(XMMRegister dst, XMMRegister src) { emit_int16(0x17, (0xC0 | encode)); } +void Assembler::vptest(XMMRegister dst, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x17, (0xC0 | encode)); +} + void Assembler::punpcklbw(XMMRegister dst, Address src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes"); @@ -4881,6 +5484,11 @@ void Assembler::xorb(Register dst, Address src) { emit_operand(dst, src); } +void Assembler::xorw(Register dst, Register src) { + (void)prefix_and_encode(dst->encoding(), src->encoding()); + emit_arith(0x33, 0xC0, dst, src); +} + // AVX 3-operands scalar float-point arithmetic instructions void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) { @@ -5794,6 +6402,13 @@ void Assembler::pmulld(XMMRegister dst, XMMRegister src) { emit_int16(0x40, (0xC0 | encode)); } +void Assembler::pmuludq(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF4, (0xC0 | encode)); +} + void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); @@ -5816,6 +6431,13 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v emit_int16(0x40, (0xC0 | encode)); } +void Assembler::vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF4, (0xC0 | encode)); +} + void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { assert(UseAVX > 0, "requires some form of AVX"); InstructionMark im(this); @@ -5847,66 +6469,227 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vecto emit_operand(dst, src); } -// Shift packed integers left by specified number of bits. -void Assembler::psllw(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); +// Min, max +void Assembler::pminsb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - // XMM6 is for /6 encoding: 66 0F 71 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int24(0x71, (0xC0 | encode), shift & 0xFF); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x38, (0xC0 | encode)); } -void Assembler::pslld(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - // XMM6 is for /6 encoding: 66 0F 72 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +void Assembler::vpminsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x38, (0xC0 | encode)); } -void Assembler::psllq(XMMRegister dst, int shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - // XMM6 is for /6 encoding: 66 0F 73 /6 ib - int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int24(0x73, (0xC0 | encode), shift & 0xFF); +void Assembler::pminsw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEA, (0xC0 | encode)); } -void Assembler::psllw(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int16((unsigned char)0xF1, (0xC0 | encode)); +void Assembler::vpminsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEA, (0xC0 | encode)); } -void Assembler::pslld(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); +void Assembler::pminsd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int16((unsigned char)0xF2, (0xC0 | encode)); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); } -void Assembler::psllq(XMMRegister dst, XMMRegister shift) { - NOT_LP64(assert(VM_Version::supports_sse2(), "")); - InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - attributes.set_rex_vex_w_reverted(); - int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int16((unsigned char)0xF3, (0xC0 | encode)); +void Assembler::vpminsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); } -void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - assert(UseAVX > 0, "requires some form of AVX"); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); - // XMM6 is for /6 encoding: 66 0F 71 /6 ib - int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +void Assembler::vpminsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 2, "requires AVX512F"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x39, (0xC0 | encode)); } -void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { - assert(UseAVX > 0, "requires some form of AVX"); - NOT_LP64(assert(VM_Version::supports_sse2(), "")); +void Assembler::minps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5D, (0xC0 | encode)); +} +void Assembler::vminps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); - // XMM6 is for /6 encoding: 66 0F 72 /6 ib + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5D, (0xC0 | encode)); +} + +void Assembler::minpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5D, (0xC0 | encode)); +} +void Assembler::vminpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5D, (0xC0 | encode)); +} + +void Assembler::pmaxsb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3C, (0xC0 | encode)); +} + +void Assembler::vpmaxsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3C, (0xC0 | encode)); +} + +void Assembler::pmaxsw(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse2(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEE, (0xC0 | encode)); +} + +void Assembler::vpmaxsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_avx512bw()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEE, (0xC0 | encode)); +} + +void Assembler::pmaxsd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3D, (0xC0 | encode)); +} + +void Assembler::vpmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : + (vector_len == AVX_256bit ? VM_Version::supports_avx2() : VM_Version::supports_evex()), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3D, (0xC0 | encode)); +} + +void Assembler::vpmaxsq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 2, "requires AVX512F"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x3D, (0xC0 | encode)); +} + +void Assembler::maxps(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5F, (0xC0 | encode)); +} + +void Assembler::vmaxps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int16(0x5F, (0xC0 | encode)); +} + +void Assembler::maxpd(XMMRegister dst, XMMRegister src) { + NOT_LP64(assert(VM_Version::supports_sse(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5F, (0xC0 | encode)); +} + +void Assembler::vmaxpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len >= AVX_512bit ? VM_Version::supports_evex() : VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* vex_w */true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x5F, (0xC0 | encode)); +} + +// Shift packed integers left by specified number of bits. +void Assembler::psllw(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + // XMM6 is for /6 encoding: 66 0F 71 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::pslld(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + // XMM6 is for /6 encoding: 66 0F 72 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x72, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::psllq(XMMRegister dst, int shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + // XMM6 is for /6 encoding: 66 0F 73 /6 ib + int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x73, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::psllw(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF1, (0xC0 | encode)); +} + +void Assembler::pslld(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF2, (0xC0 | encode)); +} + +void Assembler::psllq(XMMRegister dst, XMMRegister shift) { + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xF3, (0xC0 | encode)); +} + +void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true); + // XMM6 is for /6 encoding: 66 0F 71 /6 ib + int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24(0x71, (0xC0 | encode), shift & 0xFF); +} + +void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) { + assert(UseAVX > 0, "requires some form of AVX"); + NOT_LP64(assert(VM_Version::supports_sse2(), "")); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + // XMM6 is for /6 encoding: 66 0F 72 /6 ib int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); emit_int24(0x72, (0xC0 | encode), shift & 0xFF); } @@ -6168,13 +6951,67 @@ void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve emit_int16((unsigned char)0xDB, (0xC0 | encode)); } +//Variable Shift packed integers logically left. +void Assembler::vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 1, "requires AVX2"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +void Assembler::vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 1, "requires AVX2"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x47, (0xC0 | encode)); +} + +//Variable Shift packed integers logically right. +void Assembler::vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 1, "requires AVX2"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +void Assembler::vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 1, "requires AVX2"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x45, (0xC0 | encode)); +} + +//Variable right Shift arithmetic packed integers . +void Assembler::vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 1, "requires AVX2"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x46, (0xC0 | encode)); +} + +void Assembler::evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(VM_Version::supports_avx512bw(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x11, (0xC0 | encode)); +} + +void Assembler::evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { + assert(UseAVX > 2, "requires AVX512"); + assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL"); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x46, (0xC0 | encode)); +} + void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { assert(VM_Version::supports_avx512_vbmi2(), "requires vbmi2"); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); - emit_int8(0x71); - emit_int8((0xC0 | encode)); + emit_int16(0x71, (0xC0 | encode)); } void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) { @@ -6200,7 +7037,6 @@ void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve emit_int16((unsigned char)0xDF, (0xC0 | encode)); } - void Assembler::por(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6233,6 +7069,35 @@ void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vec } +void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEB, (0xC0 | encode)); +} + +void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W0 EB /r + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int8((unsigned char)0xEB); + emit_operand(dst, src); +} + void Assembler::pxor(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); @@ -6257,13 +7122,33 @@ void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_ emit_operand(dst, src); } +void Assembler::vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(UseAVX > 2, "requires some form of EVEX"); + InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); + attributes.set_rex_vex_w_reverted(); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEF, (0xC0 | encode)); +} + +void Assembler::evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W0 EF /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16((unsigned char)0xEF, (0xC0 | encode)); +} + void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { assert(VM_Version::supports_evex(), "requires EVEX support"); InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true); attributes.set_is_evex_instruction(); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); - emit_int8((unsigned char)0xEF); - emit_int8((0xC0 | encode)); + emit_int16((unsigned char)0xEF, (0xC0 | encode)); } void Assembler::evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { @@ -6960,12 +7845,67 @@ void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) { int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); emit_int16(0x7C, (0xC0 | encode)); } + +void Assembler::vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x90); + emit_operand(dst, src); +} + +void Assembler::vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x90); + emit_operand(dst, src); +} + +void Assembler::vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x92); + emit_operand(dst, src); +} + +void Assembler::vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len) { + assert(VM_Version::supports_avx2(), ""); + assert(vector_len == Assembler::AVX_128bit || vector_len == Assembler::AVX_256bit, ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true); + vex_prefix(src, mask->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x92); + emit_operand(dst, src); +} void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len) { assert(VM_Version::supports_evex(), ""); assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + assert(mask != k0, "instruction will #UD if mask is in k0"); InstructionMark im(this); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); - attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); attributes.reset_is_clear_context(); attributes.set_embedded_opmask_register_specifier(mask); attributes.set_is_evex_instruction(); @@ -6974,6 +7914,116 @@ void Assembler::evpgatherdd(XMMRegister dst, KRegister mask, Address src, int ve emit_int8((unsigned char)0x90); emit_operand(dst, src); } + +void Assembler::evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x90); + emit_operand(dst, src); +} + +void Assembler::evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x92); + emit_operand(dst, src); +} + +void Assembler::evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(dst != xnoreg, "sanity"); + assert(src.isxmmindex(),"expected to be xmm index"); + assert(dst != src.xmmindex(), "instruction will #UD if dst and index are the same"); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + // swap src<->dst for encoding + vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0x92); + emit_operand(dst, src); +} + +void Assembler::evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA0); + emit_operand(src, dst); +} + +void Assembler::evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA0); + emit_operand(src, dst); +} + +void Assembler::evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA2); + emit_operand(src, dst); +} + +void Assembler::evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(mask != k0, "instruction will #UD if mask is in k0"); + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit); + attributes.reset_is_clear_context(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.set_is_evex_instruction(); + vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int8((unsigned char)0xA2); + emit_operand(src, dst); +} // Carry-Less Multiplication Quadword void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) { assert(VM_Version::supports_clmul(), ""); @@ -7571,7 +8621,8 @@ void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_r, boo // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024 byte4 |= ((_attributes->get_vector_len())& 0x3) << 5; // last is EVEX.z for zero/merge actions - if (_attributes->is_no_reg_mask() == false) { + if (_attributes->is_no_reg_mask() == false && + _attributes->get_embedded_opmask_register_specifier() != 0) { byte4 |= (_attributes->is_clear_context() ? EVEX_Z : 0); } @@ -7739,7 +8790,7 @@ void Assembler::vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { emit_int16(0x5D, (0xC0 | encode)); } -void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { +void Assembler::vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); @@ -7756,8 +8807,8 @@ void Assembler::blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM emit_int24(0x4C, (0xC0 | encode), (0xF0 & src2_enc << 4)); } -void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { - assert(VM_Version::supports_avx(), ""); +void Assembler::vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { + assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), ""); assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); @@ -7765,28 +8816,330 @@ void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMM emit_int24(0x4B, (0xC0 | encode), (0xF0 & src2_enc << 4)); } -void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) { - assert(VM_Version::supports_avx(), ""); +void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { + assert(VM_Version::supports_avx2(), ""); assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); - int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); - emit_int24((unsigned char)0xC2, (0xC0 | encode), (0xF & cop)); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8); } -void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { +void Assembler::vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len) { assert(VM_Version::supports_avx(), ""); assert(vector_len <= AVX_256bit, ""); InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC2, (0xC0 | encode), (unsigned char)comparison); +} + +void Assembler::evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + ComparisonPredicateFP comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.0F.W0 C2 /r ib + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison); +} + +void Assembler::evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + ComparisonPredicateFP comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F.W1 C2 /r ib + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int24((unsigned char)0xC2, (0xC0 | encode), comparison); +} + +void Assembler::blendvps(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding"); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x14, (0xC0 | encode)); +} + +void Assembler::blendvpd(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding"); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x15, (0xC0 | encode)); +} + +void Assembler::pblendvb(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + assert(UseAVX <= 0, "sse encoding is inconsistent with avx encoding"); + InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x10, (0xC0 | encode)); +} + +void Assembler::vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) { + assert(UseAVX > 0 && (vector_len == AVX_128bit || vector_len == AVX_256bit), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); int src2_enc = src2->encoding(); emit_int24(0x4A, (0xC0 | encode), (0xF0 & src2_enc << 4)); } -void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { - assert(VM_Version::supports_avx2(), ""); - InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); +void Assembler::vblendps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) { + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); - emit_int24(0x02, (0xC0 | encode), (unsigned char)imm8); + emit_int24(0x0C, (0xC0 | encode), imm8); +} + +void Assembler::vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x64, (0xC0 | encode)); +} + +void Assembler::vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x65, (0xC0 | encode)); +} + +void Assembler::vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes); + emit_int16(0x66, (0xC0 | encode)); +} + +void Assembler::vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { + assert(vector_len == AVX_128bit ? VM_Version::supports_avx() : VM_Version::supports_avx2(), ""); + assert(vector_len <= AVX_256bit, "evex encoding is different - has k register as dest"); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x37, (0xC0 | encode)); +} + +void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x1F, (0xC0 | encode), comparison); +} + +void Assembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W0 1F /r ib + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int dst_enc = kdst->encoding(); + vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x1F); + emit_operand(as_Register(dst_enc), src); + emit_int8((unsigned char)comparison); +} + +void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x1F, (0xC0 | encode), comparison); +} + +void Assembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W1 1F /r ib + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int dst_enc = kdst->encoding(); + vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x1F); + emit_operand(as_Register(dst_enc), src); + emit_int8((unsigned char)comparison); +} + +void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x3F, (0xC0 | encode), comparison); +} + +void Assembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W0 3F /r ib + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int dst_enc = kdst->encoding(); + vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x3F); + emit_operand(as_Register(dst_enc), src); + emit_int8((unsigned char)comparison); +} + +void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int24(0x3F, (0xC0 | encode), comparison); +} + +void Assembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + assert(comparison >= Assembler::eq && comparison <= Assembler::_true, ""); + // Encoding: EVEX.NDS.XXX.66.0F3A.W1 3F /r ib + InstructionMark im(this); + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + attributes.reset_is_clear_context(); + int dst_enc = kdst->encoding(); + vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + emit_int8((unsigned char)0x3F); + emit_operand(as_Register(dst_enc), src); + emit_int8((unsigned char)comparison); +} + +void Assembler::vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len) { + assert(VM_Version::supports_avx(), ""); + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false); + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes); + int mask_enc = mask->encoding(); + emit_int24(0x4C, (0xC0 | encode), 0xF0 & mask_enc << 4); +} + +void Assembler::evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F38.W1 65 /r + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x65, (0xC0 | encode)); +} + +void Assembler::evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + // Encoding: EVEX.NDS.XXX.66.0F38.W0 65 /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x65, (0xC0 | encode)); +} + +void Assembler::evpblendmb (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + // Encoding: EVEX.NDS.512.66.0F38.W0 66 /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x66, (0xC0 | encode)); +} + +void Assembler::evpblendmw (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + assert(VM_Version::supports_avx512bw(), ""); + // Encoding: EVEX.NDS.512.66.0F38.W1 66 /r + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x66, (0xC0 | encode)); +} + +void Assembler::evpblendmd (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + //Encoding: EVEX.NDS.512.66.0F38.W0 64 /r + InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x64, (0xC0 | encode)); +} + +void Assembler::evpblendmq (XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) { + assert(VM_Version::supports_evex(), ""); + //Encoding: EVEX.NDS.512.66.0F38.W1 64 /r + InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true); + attributes.set_is_evex_instruction(); + attributes.set_embedded_opmask_register_specifier(mask); + if (merge) { + attributes.reset_is_clear_context(); + } + int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes); + emit_int16(0x64, (0xC0 | encode)); } void Assembler::shlxl(Register dst, Register src1, Register src2) { @@ -7803,6 +9156,13 @@ void Assembler::shlxq(Register dst, Register src1, Register src2) { emit_int16((unsigned char)0xF7, (0xC0 | encode)); } +void Assembler::shrxq(Register dst, Register src1, Register src2) { + assert(VM_Version::supports_bmi2(), ""); + InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true); + int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, &attributes); + emit_int16((unsigned char)0xF7, (0xC0 | encode)); +} + #ifndef _LP64 void Assembler::incl(Register dst) { @@ -8443,7 +9803,7 @@ void Assembler::cmpq(Register dst, int32_t imm32) { void Assembler::cmpq(Address dst, Register src) { InstructionMark im(this); - emit_int16(get_prefixq(dst, src), 0x3B); + emit_int16(get_prefixq(dst, src), 0x39); emit_operand(src, dst); } diff --git a/src/hotspot/cpu/x86/assembler_x86.hpp b/src/hotspot/cpu/x86/assembler_x86.hpp index 283285dc347..1d6eb41bd05 100644 --- a/src/hotspot/cpu/x86/assembler_x86.hpp +++ b/src/hotspot/cpu/x86/assembler_x86.hpp @@ -588,6 +588,7 @@ class Assembler : public AbstractAssembler { #endif }; + // Comparison predicates for integral types & FP types when using SSE enum ComparisonPredicate { eq = 0, lt = 1, @@ -599,6 +600,51 @@ class Assembler : public AbstractAssembler { _true = 7 }; + // Comparison predicates for FP types when using AVX + // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true. + // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN. + enum ComparisonPredicateFP { + EQ_OQ = 0, + LT_OS = 1, + LE_OS = 2, + UNORD_Q = 3, + NEQ_UQ = 4, + NLT_US = 5, + NLE_US = 6, + ORD_Q = 7, + EQ_UQ = 8, + NGE_US = 9, + NGT_US = 0xA, + FALSE_OQ = 0XB, + NEQ_OQ = 0xC, + GE_OS = 0xD, + GT_OS = 0xE, + TRUE_UQ = 0xF, + EQ_OS = 0x10, + LT_OQ = 0x11, + LE_OQ = 0x12, + UNORD_S = 0x13, + NEQ_US = 0x14, + NLT_UQ = 0x15, + NLE_UQ = 0x16, + ORD_S = 0x17, + EQ_US = 0x18, + NGE_UQ = 0x19, + NGT_UQ = 0x1A, + FALSE_OS = 0x1B, + NEQ_OS = 0x1C, + GE_OQ = 0x1D, + GT_OQ = 0x1E, + TRUE_US =0x1F + }; + + enum Width { + B = 0, + W = 1, + D = 2, + Q = 3 + }; + //---< calculate length of instruction >--- // As instruction size can't be found out easily on x86/x64, // we just use '4' for len and maxlen. @@ -794,7 +840,6 @@ class Assembler : public AbstractAssembler { void decl(Register dst); void decl(Address dst); - void decq(Register dst); void decq(Address dst); void incl(Register dst); @@ -879,6 +924,7 @@ class Assembler : public AbstractAssembler { void popa_uncached(); #endif void vzeroupper_uncached(); + void decq(Register dst); void pusha(); void popa(); @@ -918,6 +964,7 @@ class Assembler : public AbstractAssembler { void adcq(Register dst, Register src); void addb(Address dst, int imm8); + void addw(Register dst, Register src); void addw(Address dst, int imm16); void addl(Address dst, int32_t imm32); @@ -968,6 +1015,8 @@ class Assembler : public AbstractAssembler { void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void andw(Register dst, Register src); + void andl(Address dst, int32_t imm32); void andl(Register dst, int32_t imm32); void andl(Register dst, Address src); @@ -1093,9 +1142,11 @@ class Assembler : public AbstractAssembler { // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value void cvtdq2pd(XMMRegister dst, XMMRegister src); + void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len); // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value void cvtdq2ps(XMMRegister dst, XMMRegister src); + void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len); // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value void cvtss2sd(XMMRegister dst, XMMRegister src); @@ -1111,8 +1162,25 @@ class Assembler : public AbstractAssembler { void cvttss2sil(Register dst, XMMRegister src); void cvttss2siq(Register dst, XMMRegister src); + // Convert vector double to int void cvttpd2dq(XMMRegister dst, XMMRegister src); + // Convert vector float and double + void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len); + void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len); + + // Convert vector long to vector FP + void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len); + void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len); + + // Evex casts with truncation + void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len); + //Abs of packed Integer values void pabsb(XMMRegister dst, XMMRegister src); void pabsw(XMMRegister dst, XMMRegister src); @@ -1472,20 +1540,30 @@ class Assembler : public AbstractAssembler { void vmovdqu(XMMRegister dst, XMMRegister src); // Move Unaligned 512bit Vector - void evmovdqub(Address dst, XMMRegister src, int vector_len); - void evmovdqub(XMMRegister dst, Address src, int vector_len); - void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len); - void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len); - void evmovdquw(Address dst, XMMRegister src, int vector_len); - void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len); - void evmovdquw(XMMRegister dst, Address src, int vector_len); - void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len); + void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len); + void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len); + void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len); + void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len); + void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len); + void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); void evmovdqul(Address dst, XMMRegister src, int vector_len); void evmovdqul(XMMRegister dst, Address src, int vector_len); void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len); + void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); void evmovdquq(Address dst, XMMRegister src, int vector_len); void evmovdquq(XMMRegister dst, Address src, int vector_len); void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len); + void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len); + void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len); + + // Generic move instructions. + void evmovdqu(Address dst, KRegister mask, XMMRegister src, int vector_len, int type); + void evmovdqu(XMMRegister dst, KRegister mask, Address src, int vector_len, int type); // Move lower 64bit to high 64bit in 128bit register void movlhps(XMMRegister dst, XMMRegister src); @@ -1517,6 +1595,9 @@ class Assembler : public AbstractAssembler { // Move Quadword void movq(Address dst, XMMRegister src); void movq(XMMRegister dst, Address src); + void movq(XMMRegister dst, XMMRegister src); + void movq(Register dst, XMMRegister src); + void movq(XMMRegister dst, Register src); void movsbl(Register dst, Address src); void movsbl(Register dst, Register src); @@ -1597,6 +1678,8 @@ class Assembler : public AbstractAssembler { void btrq(Address dst, int imm8); #endif + void orw(Register dst, Register src); + void orl(Address dst, int32_t imm32); void orl(Register dst, int32_t imm32); void orl(Register dst, Address src); @@ -1610,17 +1693,32 @@ class Assembler : public AbstractAssembler { void orq(Register dst, Address src); void orq(Register dst, Register src); + // Pack with signed saturation + void packsswb(XMMRegister dst, XMMRegister src); + void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void packssdw(XMMRegister dst, XMMRegister src); + void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + // Pack with unsigned saturation void packuswb(XMMRegister dst, XMMRegister src); void packuswb(XMMRegister dst, Address src); + void packusdw(XMMRegister dst, XMMRegister src); void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - // Pemutation of 64bit words + // Permutations void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void vpermq(XMMRegister dst, XMMRegister src, int imm8); void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len); + void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); + void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len); void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void pause(); @@ -1633,11 +1731,14 @@ class Assembler : public AbstractAssembler { void pcmpestri(XMMRegister xmm1, Address src, int imm8); void pcmpeqb(XMMRegister dst, XMMRegister src); + void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len); + void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len); void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); + void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len); void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); @@ -1650,16 +1751,22 @@ class Assembler : public AbstractAssembler { void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len); + void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void pcmpeqd(XMMRegister dst, XMMRegister src); void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); - void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len); + void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len); void pcmpeqq(XMMRegister dst, XMMRegister src); + void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len); void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len); void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len); + void pcmpgtq(XMMRegister dst, XMMRegister src); + void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void pmovmskb(Register dst, XMMRegister src); void vpmovmskb(Register dst, XMMRegister src); @@ -1668,6 +1775,7 @@ class Assembler : public AbstractAssembler { void pextrq(Register dst, XMMRegister src, int imm8); void pextrd(Address dst, XMMRegister src, int imm8); void pextrq(Address dst, XMMRegister src, int imm8); + void pextrb(Register dst, XMMRegister src, int imm8); void pextrb(Address dst, XMMRegister src, int imm8); // SSE 2 extract void pextrw(Register dst, XMMRegister src, int imm8); @@ -1676,21 +1784,46 @@ class Assembler : public AbstractAssembler { // SSE 4.1 insert void pinsrd(XMMRegister dst, Register src, int imm8); void pinsrq(XMMRegister dst, Register src, int imm8); + void pinsrb(XMMRegister dst, Register src, int imm8); void pinsrd(XMMRegister dst, Address src, int imm8); void pinsrq(XMMRegister dst, Address src, int imm8); void pinsrb(XMMRegister dst, Address src, int imm8); + void insertps(XMMRegister dst, XMMRegister src, int imm8); // SSE 2 insert void pinsrw(XMMRegister dst, Register src, int imm8); void pinsrw(XMMRegister dst, Address src, int imm8); - // SSE4.1 packed move + // AVX insert + void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8); + void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8); + void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8); + void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8); + void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8); + + // Zero extend moves void pmovzxbw(XMMRegister dst, XMMRegister src); void pmovzxbw(XMMRegister dst, Address src); - + void pmovzxbd(XMMRegister dst, XMMRegister src); void vpmovzxbw( XMMRegister dst, Address src, int vector_len); + void pmovzxdq(XMMRegister dst, XMMRegister src); void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len); void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len); + // Sign extend moves + void pmovsxbd(XMMRegister dst, XMMRegister src); + void pmovsxbq(XMMRegister dst, XMMRegister src); + void pmovsxbw(XMMRegister dst, XMMRegister src); + void pmovsxwd(XMMRegister dst, XMMRegister src); + void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len); + void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len); + void evpmovwb(Address dst, XMMRegister src, int vector_len); void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len); @@ -1698,10 +1831,6 @@ class Assembler : public AbstractAssembler { void evpmovdb(Address dst, XMMRegister src, int vector_len); - // Sign extend moves - void pmovsxbw(XMMRegister dst, XMMRegister src); - void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len); - // Multiply add void pmaddwd(XMMRegister dst, XMMRegister src); void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); @@ -1745,10 +1874,17 @@ class Assembler : public AbstractAssembler { void pshufd(XMMRegister dst, Address src, int mode); void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len); - // Shuffle Packed Low Words + // Shuffle Packed High/Low Words + void pshufhw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, XMMRegister src, int mode); void pshuflw(XMMRegister dst, Address src, int mode); + //shuffle floats and doubles + void pshufps(XMMRegister, XMMRegister, int); + void pshufpd(XMMRegister, XMMRegister, int); + void vpshufps(XMMRegister, XMMRegister, XMMRegister, int, int); + void vpshufpd(XMMRegister, XMMRegister, XMMRegister, int, int); + // Shuffle packed values at 128 bit granularity void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); @@ -1764,6 +1900,9 @@ class Assembler : public AbstractAssembler { void vptest(XMMRegister dst, XMMRegister src); void vptest(XMMRegister dst, Address src); + // Vector compare + void vptest(XMMRegister dst, XMMRegister src, int vector_len); + // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, Address src); @@ -1837,6 +1976,7 @@ class Assembler : public AbstractAssembler { void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8); void pblendw(XMMRegister dst, XMMRegister src, int imm8); + void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len); void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8); void sha1nexte(XMMRegister dst, XMMRegister src); @@ -1955,6 +2095,7 @@ class Assembler : public AbstractAssembler { void xorl(Register dst, Register src); void xorb(Register dst, Address src); + void xorw(Register dst, Register src); void xorq(Register dst, Address src); void xorq(Register dst, Register src); @@ -1989,8 +2130,12 @@ class Assembler : public AbstractAssembler { void shlxl(Register dst, Register src1, Register src2); void shlxq(Register dst, Register src1, Register src2); + void shrxq(Register dst, Register src1, Register src2); + //====================VECTOR ARITHMETIC===================================== + void evpmovd2m(KRegister kdst, XMMRegister src, int vector_len); + void evpmovq2m(KRegister kdst, XMMRegister src, int vector_len); // Add Packed Floating-Point Values void addpd(XMMRegister dst, XMMRegister src); @@ -2100,13 +2245,41 @@ class Assembler : public AbstractAssembler { // Multiply packed integers (only shorts and ints) void pmullw(XMMRegister dst, XMMRegister src); void pmulld(XMMRegister dst, XMMRegister src); + void pmuludq(XMMRegister dst, XMMRegister src); void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + // Minimum of packed integers + void pminsb(XMMRegister dst, XMMRegister src); + void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pminsw(XMMRegister dst, XMMRegister src); + void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pminsd(XMMRegister dst, XMMRegister src); + void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void minps(XMMRegister dst, XMMRegister src); + void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void minpd(XMMRegister dst, XMMRegister src); + void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + + // Maximum of packed integers + void pmaxsb(XMMRegister dst, XMMRegister src); + void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pmaxsw(XMMRegister dst, XMMRegister src); + void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void pmaxsd(XMMRegister dst, XMMRegister src); + void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void maxps(XMMRegister dst, XMMRegister src); + void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + void maxpd(XMMRegister dst, XMMRegister src); + void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len); + // Shift left packed integers void psllw(XMMRegister dst, int shift); void pslld(XMMRegister dst, int shift); @@ -2148,9 +2321,22 @@ class Assembler : public AbstractAssembler { void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len); void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len); void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + // Variable shift left packed integers + void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + + // Variable shift right packed integers + void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + + // Variable shift right arithmetic packed integers + void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); + void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); @@ -2158,6 +2344,7 @@ class Assembler : public AbstractAssembler { void pand(XMMRegister dst, XMMRegister src); void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); // Andn packed integers @@ -2170,10 +2357,15 @@ class Assembler : public AbstractAssembler { void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len); + // Xor packed integers void pxor(XMMRegister dst, XMMRegister src); void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len); + void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len); @@ -2251,7 +2443,21 @@ class Assembler : public AbstractAssembler { void evpbroadcastd(XMMRegister dst, Register src, int vector_len); void evpbroadcastq(XMMRegister dst, Register src, int vector_len); - void evpgatherdd(XMMRegister dst, KRegister k1, Address src, int vector_len); + // Gather AVX2 and AVX3 + void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len); + void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len); + void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len); + void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len); + void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len); + void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len); + void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len); + void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len); + + //Scatter AVX3 only + void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len); + void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len); + void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len); + void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len); // Carry-Less Multiplication Quadword void pclmulqdq(XMMRegister dst, XMMRegister src, int mask); @@ -2264,14 +2470,56 @@ class Assembler : public AbstractAssembler { // runtime code and native libraries. void vzeroupper(); - // AVX support for vectorized conditional move (float/double). The following two instructions used only coupled. + // Vector double compares + void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); + void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + ComparisonPredicateFP comparison, int vector_len); + + // Vector float compares + void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len); + void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + ComparisonPredicateFP comparison, int vector_len); + + // Vector integer compares + void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len); + void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len); + + // Vector long compares + void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len); + void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len); + + // Vector byte compares + void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len); + void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len); + + // Vector short compares + void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len); + void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src, + int comparison, int vector_len); + + // Vector blends + void blendvps(XMMRegister dst, XMMRegister src); + void blendvpd(XMMRegister dst, XMMRegister src); + void pblendvb(XMMRegister dst, XMMRegister src); void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); - void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); - void blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); - void cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len); - void blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); + void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len); + void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len); + void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len); void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len); - + void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); + void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len); protected: // Next instructions require address alignment 16 bytes SSE mode. // They should be called only from corresponding MacroAssembler instructions. @@ -2367,7 +2615,8 @@ class InstructionAttr { // Internal encoding data used in compressed immediate offset programming void set_evex_encoding(int value) { _evex_encoding = value; } - // Set the Evex.Z field to be used to clear all non directed XMM/YMM/ZMM components + // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components. + // This method unsets it so that merge semantics are used instead. void reset_is_clear_context(void) { _is_clear_context = false; } // Map back to current asembler so that we can manage object level assocation diff --git a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp index 526fe5af2fc..6853953f0eb 100644 --- a/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp +++ b/src/hotspot/cpu/x86/c1_CodeStubs_x86.cpp @@ -79,6 +79,32 @@ void ConversionStub::emit_code(LIR_Assembler* ce) { } #endif // !_LP64 +void C1SafepointPollStub::emit_code(LIR_Assembler* ce) { + __ bind(_entry); + InternalAddress safepoint_pc(ce->masm()->pc() - ce->masm()->offset() + safepoint_offset()); +#ifdef _LP64 + __ lea(rscratch1, safepoint_pc); + __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); +#else + const Register tmp1 = rcx; + const Register tmp2 = rdx; + __ push(tmp1); + __ push(tmp2); + + __ lea(tmp1, safepoint_pc); + __ get_thread(tmp2); + __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1); + + __ pop(tmp2); + __ pop(tmp1); +#endif /* _LP64 */ + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + __ jump(RuntimeAddress(stub)); +} + void CounterOverflowStub::emit_code(LIR_Assembler* ce) { __ bind(_entry); Metadata *m = _method->as_constant_ptr()->as_metadata(); diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp index 8a0200a18dc..bba946ec4ad 100644 --- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp @@ -25,6 +25,7 @@ #include "precompiled.hpp" #include "asm/macroAssembler.hpp" #include "asm/macroAssembler.inline.hpp" +#include "c1/c1_CodeStubs.hpp" #include "c1/c1_Compilation.hpp" #include "c1/c1_LIRAssembler.hpp" #include "c1/c1_MacroAssembler.hpp" @@ -517,8 +518,7 @@ int LIR_Assembler::emit_deopt_handler() { return offset; } - -void LIR_Assembler::return_op(LIR_Opr result) { +void LIR_Assembler::return_op(LIR_Opr result, C1SafepointPollStub* code_stub) { assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,"); if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) { assert(result->fpu() == 0, "result must already be on TOS"); @@ -531,22 +531,18 @@ void LIR_Assembler::return_op(LIR_Opr result) { __ reserved_stack_check(); } - bool result_is_oop = result->is_valid() ? result->is_oop() : false; - // Note: we do not need to round double result; float result has the right precision // the poll sets the condition code, but no data registers #ifdef _LP64 - const Register poll_addr = rscratch1; - __ movptr(poll_addr, Address(r15_thread, Thread::polling_page_offset())); + const Register thread = r15_thread; #else - const Register poll_addr = rbx; - assert(FrameMap::is_caller_save_register(poll_addr), "will overwrite"); - __ get_thread(poll_addr); - __ movptr(poll_addr, Address(poll_addr, Thread::polling_page_offset())); + const Register thread = rbx; + __ get_thread(thread); #endif + code_stub->set_safepoint_offset(__ offset()); __ relocate(relocInfo::poll_return_type); - __ testl(rax, Address(poll_addr, 0)); + __ safepoint_poll(*code_stub->entry(), thread, true /* at_return */, true /* in_nmethod */); __ ret(0); } diff --git a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp index 53935539a36..60347c41163 100644 --- a/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp +++ b/src/hotspot/cpu/x86/c1_Runtime1_x86.cpp @@ -69,7 +69,7 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre push(thread); #endif // _LP64 - int call_offset; + int call_offset = -1; if (!align_stack) { set_last_Java_frame(thread, noreg, rbp, NULL); } else { @@ -133,6 +133,8 @@ int StubAssembler::call_RT(Register oop_result1, Register metadata_result, addre if (metadata_result->is_valid()) { get_vm_result_2(metadata_result, thread); } + + assert(call_offset >= 0, "Should be set"); return call_offset; } diff --git a/src/hotspot/cpu/x86/c1_globals_x86.hpp b/src/hotspot/cpu/x86/c1_globals_x86.hpp index fbf538c2cec..afd2a65cb89 100644 --- a/src/hotspot/cpu/x86/c1_globals_x86.hpp +++ b/src/hotspot/cpu/x86/c1_globals_x86.hpp @@ -33,8 +33,6 @@ #ifndef TIERED define_pd_global(bool, BackgroundCompilation, true ); -define_pd_global(bool, UseTLAB, true ); -define_pd_global(bool, ResizeTLAB, true ); define_pd_global(bool, InlineIntrinsics, true ); define_pd_global(bool, PreferInterpreterNativeStubs, false); define_pd_global(bool, ProfileTraps, false); diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp index 8940b0c3c44..3aef6446f78 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp @@ -33,6 +33,21 @@ #include "runtime/objectMonitor.hpp" #include "runtime/stubRoutines.hpp" +inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vlen_in_bytes) { + switch (vlen_in_bytes) { + case 4: // fall-through + case 8: // fall-through + case 16: return Assembler::AVX_128bit; + case 32: return Assembler::AVX_256bit; + case 64: return Assembler::AVX_512bit; + + default: { + ShouldNotReachHere(); + return Assembler::AVX_NoVec; + } + } +} + void C2_MacroAssembler::setvectmask(Register dst, Register src) { guarantee(PostLoopMultiversioning, "must be"); Assembler::movl(dst, 1); @@ -861,6 +876,174 @@ void C2_MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, i } } +void C2_MacroAssembler::pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, XMMRegister tmp) { + assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity"); + assert(tmp == xnoreg || elem_bt == T_LONG, "unused"); + + if (opcode == Op_MinV) { + if (elem_bt == T_BYTE) { + pminsb(dst, src); + } else if (elem_bt == T_SHORT) { + pminsw(dst, src); + } else if (elem_bt == T_INT) { + pminsd(dst, src); + } else { + assert(elem_bt == T_LONG, "required"); + assert(tmp == xmm0, "required"); + assert_different_registers(dst, src, tmp); + movdqu(xmm0, dst); + pcmpgtq(xmm0, src); + blendvpd(dst, src); // xmm0 as mask + } + } else { // opcode == Op_MaxV + if (elem_bt == T_BYTE) { + pmaxsb(dst, src); + } else if (elem_bt == T_SHORT) { + pmaxsw(dst, src); + } else if (elem_bt == T_INT) { + pmaxsd(dst, src); + } else { + assert(elem_bt == T_LONG, "required"); + assert(tmp == xmm0, "required"); + assert_different_registers(dst, src, tmp); + movdqu(xmm0, src); + pcmpgtq(xmm0, dst); + blendvpd(dst, src); // xmm0 as mask + } + } +} + +void C2_MacroAssembler::vpminmax(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister src1, XMMRegister src2, + int vlen_enc) { + assert(opcode == Op_MinV || opcode == Op_MaxV, "sanity"); + + if (opcode == Op_MinV) { + if (elem_bt == T_BYTE) { + vpminsb(dst, src1, src2, vlen_enc); + } else if (elem_bt == T_SHORT) { + vpminsw(dst, src1, src2, vlen_enc); + } else if (elem_bt == T_INT) { + vpminsd(dst, src1, src2, vlen_enc); + } else { + assert(elem_bt == T_LONG, "required"); + if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { + vpminsq(dst, src1, src2, vlen_enc); + } else { + assert_different_registers(dst, src1, src2); + vpcmpgtq(dst, src1, src2, vlen_enc); + vblendvpd(dst, src1, src2, dst, vlen_enc); + } + } + } else { // opcode == Op_MaxV + if (elem_bt == T_BYTE) { + vpmaxsb(dst, src1, src2, vlen_enc); + } else if (elem_bt == T_SHORT) { + vpmaxsw(dst, src1, src2, vlen_enc); + } else if (elem_bt == T_INT) { + vpmaxsd(dst, src1, src2, vlen_enc); + } else { + assert(elem_bt == T_LONG, "required"); + if (UseAVX > 2 && (vlen_enc == Assembler::AVX_512bit || VM_Version::supports_avx512vl())) { + vpmaxsq(dst, src1, src2, vlen_enc); + } else { + assert_different_registers(dst, src1, src2); + vpcmpgtq(dst, src1, src2, vlen_enc); + vblendvpd(dst, src2, src1, dst, vlen_enc); + } + } + } +} + +// Float/Double min max + +void C2_MacroAssembler::vminmax_fp(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister a, XMMRegister b, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, + int vlen_enc) { + assert(UseAVX > 0, "required"); + assert(opcode == Op_MinV || opcode == Op_MinReductionV || + opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity"); + assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity"); + assert_different_registers(a, b, tmp, atmp, btmp); + + bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV); + bool is_double_word = is_double_word_type(elem_bt); + + if (!is_double_word && is_min) { + vblendvps(atmp, a, b, a, vlen_enc); + vblendvps(btmp, b, a, a, vlen_enc); + vminps(tmp, atmp, btmp, vlen_enc); + vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + vblendvps(dst, tmp, atmp, btmp, vlen_enc); + } else if (!is_double_word && !is_min) { + vblendvps(btmp, b, a, b, vlen_enc); + vblendvps(atmp, a, b, b, vlen_enc); + vmaxps(tmp, atmp, btmp, vlen_enc); + vcmpps(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + vblendvps(dst, tmp, atmp, btmp, vlen_enc); + } else if (is_double_word && is_min) { + vblendvpd(atmp, a, b, a, vlen_enc); + vblendvpd(btmp, b, a, a, vlen_enc); + vminpd(tmp, atmp, btmp, vlen_enc); + vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + vblendvpd(dst, tmp, atmp, btmp, vlen_enc); + } else { + assert(is_double_word && !is_min, "sanity"); + vblendvpd(btmp, b, a, b, vlen_enc); + vblendvpd(atmp, a, b, b, vlen_enc); + vmaxpd(tmp, atmp, btmp, vlen_enc); + vcmppd(btmp, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + vblendvpd(dst, tmp, atmp, btmp, vlen_enc); + } +} + +void C2_MacroAssembler::evminmax_fp(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister a, XMMRegister b, + KRegister ktmp, XMMRegister atmp, XMMRegister btmp, + int vlen_enc) { + assert(UseAVX > 2, "required"); + assert(opcode == Op_MinV || opcode == Op_MinReductionV || + opcode == Op_MaxV || opcode == Op_MaxReductionV, "sanity"); + assert(elem_bt == T_FLOAT || elem_bt == T_DOUBLE, "sanity"); + assert_different_registers(dst, a, b, atmp, btmp); + + bool is_min = (opcode == Op_MinV || opcode == Op_MinReductionV); + bool is_double_word = is_double_word_type(elem_bt); + bool merge = true; + + if (!is_double_word && is_min) { + evpmovd2m(ktmp, a, vlen_enc); + evblendmps(atmp, ktmp, a, b, merge, vlen_enc); + evblendmps(btmp, ktmp, b, a, merge, vlen_enc); + vminps(dst, atmp, btmp, vlen_enc); + evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + evmovdqul(dst, ktmp, atmp, merge, vlen_enc); + } else if (!is_double_word && !is_min) { + evpmovd2m(ktmp, b, vlen_enc); + evblendmps(atmp, ktmp, a, b, merge, vlen_enc); + evblendmps(btmp, ktmp, b, a, merge, vlen_enc); + vmaxps(dst, atmp, btmp, vlen_enc); + evcmpps(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + evmovdqul(dst, ktmp, atmp, merge, vlen_enc); + } else if (is_double_word && is_min) { + evpmovq2m(ktmp, a, vlen_enc); + evblendmpd(atmp, ktmp, a, b, merge, vlen_enc); + evblendmpd(btmp, ktmp, b, a, merge, vlen_enc); + vminpd(dst, atmp, btmp, vlen_enc); + evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + evmovdquq(dst, ktmp, atmp, merge, vlen_enc); + } else { + assert(is_double_word && !is_min, "sanity"); + evpmovq2m(ktmp, b, vlen_enc); + evblendmpd(atmp, ktmp, a, b, merge, vlen_enc); + evblendmpd(btmp, ktmp, b, a, merge, vlen_enc); + vmaxpd(dst, atmp, btmp, vlen_enc); + evcmppd(ktmp, k0, atmp, atmp, Assembler::UNORD_Q, vlen_enc); + evmovdquq(dst, ktmp, atmp, merge, vlen_enc); + } +} + void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) { if (sign) { pmovsxbw(dst, src); @@ -877,6 +1060,22 @@ void C2_MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, i } } +void C2_MacroAssembler::vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { + if (sign) { + vpmovsxbd(dst, src, vector_len); + } else { + vpmovzxbd(dst, src, vector_len); + } +} + +void C2_MacroAssembler::vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len) { + if (sign) { + vpmovsxwd(dst, src, vector_len); + } else { + vpmovzxwd(dst, src, vector_len); + } +} + void C2_MacroAssembler::vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len) { if (opcode == Op_RotateLeftV) { @@ -928,14 +1127,13 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, int shift) { } } -void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) { - if (opcode == Op_RShiftVI) { - psrad(dst, src); - } else if (opcode == Op_LShiftVI) { - pslld(dst, src); - } else { - assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); - psrld(dst, src); +void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister shift) { + switch (opcode) { + case Op_RShiftVI: psrad(dst, shift); break; + case Op_LShiftVI: pslld(dst, shift); break; + case Op_URShiftVI: psrld(dst, shift); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } @@ -950,47 +1148,53 @@ void C2_MacroAssembler::vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds } } -void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - if (opcode == Op_RShiftVI) { - vpsrad(dst, nds, src, vector_len); - } else if (opcode == Op_LShiftVI) { - vpslld(dst, nds, src, vector_len); - } else { - assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI"); - vpsrld(dst, nds, src, vector_len); +void C2_MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { + switch (opcode) { + case Op_RShiftVI: vpsrad(dst, src, shift, vlen_enc); break; + case Op_LShiftVI: vpslld(dst, src, shift, vlen_enc); break; + case Op_URShiftVI: vpsrld(dst, src, shift, vlen_enc); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } -void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) { - if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { - psraw(dst, src); - } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { - psllw(dst, src); - } else { - assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); - psrlw(dst, src); +void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister shift) { + switch (opcode) { + case Op_RShiftVB: // fall-through + case Op_RShiftVS: psraw(dst, shift); break; + + case Op_LShiftVB: // fall-through + case Op_LShiftVS: psllw(dst, shift); break; + + case Op_URShiftVS: // fall-through + case Op_URShiftVB: psrlw(dst, shift); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } -void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) { - vpsraw(dst, nds, src, vector_len); - } else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) { - vpsllw(dst, nds, src, vector_len); - } else { - assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB"); - vpsrlw(dst, nds, src, vector_len); +void C2_MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { + switch (opcode) { + case Op_RShiftVB: // fall-through + case Op_RShiftVS: vpsraw(dst, src, shift, vlen_enc); break; + + case Op_LShiftVB: // fall-through + case Op_LShiftVS: vpsllw(dst, src, shift, vlen_enc); break; + + case Op_URShiftVS: // fall-through + case Op_URShiftVB: vpsrlw(dst, src, shift, vlen_enc); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } -void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) { - if (opcode == Op_RShiftVL) { - psrlq(dst, src); // using srl to implement sra on pre-avs512 systems - } else if (opcode == Op_LShiftVL) { - psllq(dst, src); - } else { - assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); - psrlq(dst, src); +void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister shift) { + switch (opcode) { + case Op_RShiftVL: psrlq(dst, shift); break; // using srl to implement sra on pre-avs512 systems + case Op_LShiftVL: psllq(dst, shift); break; + case Op_URShiftVL: psrlq(dst, shift); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } @@ -1005,14 +1209,13 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, int shift) { } } -void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { - if (opcode == Op_RShiftVL) { - evpsraq(dst, nds, src, vector_len); - } else if (opcode == Op_LShiftVL) { - vpsllq(dst, nds, src, vector_len); - } else { - assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL"); - vpsrlq(dst, nds, src, vector_len); +void C2_MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { + switch (opcode) { + case Op_RShiftVL: evpsraq(dst, src, shift, vlen_enc); break; + case Op_LShiftVL: vpsllq(dst, src, shift, vlen_enc); break; + case Op_URShiftVL: vpsrlq(dst, src, shift, vlen_enc); break; + + default: assert(false, "%s", NodeClassNames[opcode]); } } @@ -1027,45 +1230,351 @@ void C2_MacroAssembler::vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds } } -// Reductions for vectors of ints, longs, floats, and doubles. +void C2_MacroAssembler::varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { + switch (opcode) { + case Op_RShiftVB: // fall-through + case Op_RShiftVS: // fall-through + case Op_RShiftVI: vpsravd(dst, src, shift, vlen_enc); break; + + case Op_LShiftVB: // fall-through + case Op_LShiftVS: // fall-through + case Op_LShiftVI: vpsllvd(dst, src, shift, vlen_enc); break; + + case Op_URShiftVB: // fall-through + case Op_URShiftVS: // fall-through + case Op_URShiftVI: vpsrlvd(dst, src, shift, vlen_enc); break; + + default: assert(false, "%s", NodeClassNames[opcode]); + } +} + +void C2_MacroAssembler::varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc) { + switch (opcode) { + case Op_RShiftVB: // fall-through + case Op_RShiftVS: evpsravw(dst, src, shift, vlen_enc); break; -void C2_MacroAssembler::reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src) { + case Op_LShiftVB: // fall-through + case Op_LShiftVS: evpsllvw(dst, src, shift, vlen_enc); break; + + case Op_URShiftVB: // fall-through + case Op_URShiftVS: evpsrlvw(dst, src, shift, vlen_enc); break; + + default: assert(false, "%s", NodeClassNames[opcode]); + } +} + +void C2_MacroAssembler::varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister tmp) { + assert(UseAVX >= 2, "required"); + switch (opcode) { + case Op_RShiftVL: { + if (UseAVX > 2) { + assert(tmp == xnoreg, "not used"); + if (!VM_Version::supports_avx512vl()) { + vlen_enc = Assembler::AVX_512bit; + } + evpsravq(dst, src, shift, vlen_enc); + } else { + vmovdqu(tmp, ExternalAddress(StubRoutines::x86::vector_long_sign_mask())); + vpsrlvq(dst, src, shift, vlen_enc); + vpsrlvq(tmp, tmp, shift, vlen_enc); + vpxor(dst, dst, tmp, vlen_enc); + vpsubq(dst, dst, tmp, vlen_enc); + } + break; + } + case Op_LShiftVL: { + assert(tmp == xnoreg, "not used"); + vpsllvq(dst, src, shift, vlen_enc); + break; + } + case Op_URShiftVL: { + assert(tmp == xnoreg, "not used"); + vpsrlvq(dst, src, shift, vlen_enc); + break; + } + default: assert(false, "%s", NodeClassNames[opcode]); + } +} + +// Variable shift src by shift using vtmp and scratch as TEMPs giving word result in dst +void C2_MacroAssembler::varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { + assert(opcode == Op_LShiftVB || + opcode == Op_RShiftVB || + opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]); + bool sign = (opcode != Op_URShiftVB); + assert(vector_len == 0, "required"); + vextendbd(sign, dst, src, 1); + vpmovzxbd(vtmp, shift, 1); + varshiftd(opcode, dst, dst, vtmp, 1); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_int_to_byte_mask()), 1, scratch); + vextracti128_high(vtmp, dst); + vpackusdw(dst, dst, vtmp, 0); +} + +// Variable shift src by shift using vtmp and scratch as TEMPs giving byte result in dst +void C2_MacroAssembler::evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch) { + assert(opcode == Op_LShiftVB || + opcode == Op_RShiftVB || + opcode == Op_URShiftVB, "%s", NodeClassNames[opcode]); + bool sign = (opcode != Op_URShiftVB); + int ext_vector_len = vector_len + 1; + vextendbw(sign, dst, src, ext_vector_len); + vpmovzxbw(vtmp, shift, ext_vector_len); + varshiftw(opcode, dst, dst, vtmp, ext_vector_len); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_short_to_byte_mask()), ext_vector_len, scratch); + if (vector_len == 0) { + vextracti128_high(vtmp, dst); + vpackuswb(dst, dst, vtmp, vector_len); + } else { + vextracti64x4_high(vtmp, dst); + vpackuswb(dst, dst, vtmp, vector_len); + vpermq(dst, dst, 0xD8, vector_len); + } +} + +void C2_MacroAssembler::insert(BasicType typ, XMMRegister dst, Register val, int idx) { + switch(typ) { + case T_BYTE: + pinsrb(dst, val, idx); + break; + case T_SHORT: + pinsrw(dst, val, idx); + break; + case T_INT: + pinsrd(dst, val, idx); + break; + case T_LONG: + pinsrq(dst, val, idx); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx) { + switch(typ) { + case T_BYTE: + vpinsrb(dst, src, val, idx); + break; + case T_SHORT: + vpinsrw(dst, src, val, idx); + break; + case T_INT: + vpinsrd(dst, src, val, idx); + break; + case T_LONG: + vpinsrq(dst, src, val, idx); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len) { + switch(typ) { + case T_INT: + vpgatherdd(dst, Address(base, idx, Address::times_4), mask, vector_len); + break; + case T_FLOAT: + vgatherdps(dst, Address(base, idx, Address::times_4), mask, vector_len); + break; + case T_LONG: + vpgatherdq(dst, Address(base, idx, Address::times_8), mask, vector_len); + break; + case T_DOUBLE: + vgatherdpd(dst, Address(base, idx, Address::times_8), mask, vector_len); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len) { + switch(typ) { + case T_INT: + evpgatherdd(dst, mask, Address(base, idx, Address::times_4), vector_len); + break; + case T_FLOAT: + evgatherdps(dst, mask, Address(base, idx, Address::times_4), vector_len); + break; + case T_LONG: + evpgatherdq(dst, mask, Address(base, idx, Address::times_8), vector_len); + break; + case T_DOUBLE: + evgatherdpd(dst, mask, Address(base, idx, Address::times_8), vector_len); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len) { + switch(typ) { + case T_INT: + evpscatterdd(Address(base, idx, Address::times_4), mask, src, vector_len); + break; + case T_FLOAT: + evscatterdps(Address(base, idx, Address::times_4), mask, src, vector_len); + break; + case T_LONG: + evpscatterdq(Address(base, idx, Address::times_8), mask, src, vector_len); + break; + case T_DOUBLE: + evscatterdpd(Address(base, idx, Address::times_8), mask, src, vector_len); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt) { + if (vlen_in_bytes <= 16) { + pxor (dst, dst); + psubb(dst, src); + switch (elem_bt) { + case T_BYTE: /* nothing to do */ break; + case T_SHORT: pmovsxbw(dst, dst); break; + case T_INT: pmovsxbd(dst, dst); break; + case T_FLOAT: pmovsxbd(dst, dst); break; + case T_LONG: pmovsxbq(dst, dst); break; + case T_DOUBLE: pmovsxbq(dst, dst); break; + + default: assert(false, "%s", type2name(elem_bt)); + } + } else { + int vlen_enc = vector_length_encoding(vlen_in_bytes); + + vpxor (dst, dst, dst, vlen_enc); + vpsubb(dst, dst, src, vlen_enc); + switch (elem_bt) { + case T_BYTE: /* nothing to do */ break; + case T_SHORT: vpmovsxbw(dst, dst, vlen_enc); break; + case T_INT: vpmovsxbd(dst, dst, vlen_enc); break; + case T_FLOAT: vpmovsxbd(dst, dst, vlen_enc); break; + case T_LONG: vpmovsxbq(dst, dst, vlen_enc); break; + case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break; + + default: assert(false, "%s", type2name(elem_bt)); + } + } +} + +void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) { + ExternalAddress addr(StubRoutines::x86::vector_iota_indices()); + if (vlen_in_bytes <= 16) { + movdqu(dst, addr, scratch); + } else if (vlen_in_bytes == 32) { + vmovdqu(dst, addr, scratch); + } else { + assert(vlen_in_bytes == 64, "%d", vlen_in_bytes); + evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch); + } +} +// Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. + +void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) { int vector_len = Assembler::AVX_128bit; switch (opcode) { case Op_AndReductionV: pand(dst, src); break; case Op_OrReductionV: por (dst, src); break; case Op_XorReductionV: pxor(dst, src); break; - + case Op_MinReductionV: + switch (typ) { + case T_BYTE: pminsb(dst, src); break; + case T_SHORT: pminsw(dst, src); break; + case T_INT: pminsd(dst, src); break; + case T_LONG: assert(UseAVX > 2, "required"); + vpminsq(dst, dst, src, Assembler::AVX_128bit); break; + default: assert(false, "wrong type"); + } + break; + case Op_MaxReductionV: + switch (typ) { + case T_BYTE: pmaxsb(dst, src); break; + case T_SHORT: pmaxsw(dst, src); break; + case T_INT: pmaxsd(dst, src); break; + case T_LONG: assert(UseAVX > 2, "required"); + vpmaxsq(dst, dst, src, Assembler::AVX_128bit); break; + default: assert(false, "wrong type"); + } + break; case Op_AddReductionVF: addss(dst, src); break; case Op_AddReductionVD: addsd(dst, src); break; - case Op_AddReductionVI: paddd(dst, src); break; + case Op_AddReductionVI: + switch (typ) { + case T_BYTE: paddb(dst, src); break; + case T_SHORT: paddw(dst, src); break; + case T_INT: paddd(dst, src); break; + default: assert(false, "wrong type"); + } + break; case Op_AddReductionVL: paddq(dst, src); break; - case Op_MulReductionVF: mulss(dst, src); break; case Op_MulReductionVD: mulsd(dst, src); break; - case Op_MulReductionVI: pmulld(dst, src); break; - case Op_MulReductionVL: vpmullq(dst, dst, src, vector_len); break; - - default: assert(false, "wrong opcode"); + case Op_MulReductionVI: + switch (typ) { + case T_SHORT: pmullw(dst, src); break; + case T_INT: pmulld(dst, src); break; + default: assert(false, "wrong type"); + } + break; + case Op_MulReductionVL: assert(UseAVX > 2, "required"); + vpmullq(dst, dst, src, vector_len); break; + default: assert(false, "wrong opcode"); } } -void C2_MacroAssembler::reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) { +void C2_MacroAssembler::reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2) { int vector_len = Assembler::AVX_256bit; switch (opcode) { case Op_AndReductionV: vpand(dst, src1, src2, vector_len); break; case Op_OrReductionV: vpor (dst, src1, src2, vector_len); break; case Op_XorReductionV: vpxor(dst, src1, src2, vector_len); break; - - case Op_AddReductionVI: vpaddd(dst, src1, src2, vector_len); break; + case Op_MinReductionV: + switch (typ) { + case T_BYTE: vpminsb(dst, src1, src2, vector_len); break; + case T_SHORT: vpminsw(dst, src1, src2, vector_len); break; + case T_INT: vpminsd(dst, src1, src2, vector_len); break; + case T_LONG: assert(UseAVX > 2, "required"); + vpminsq(dst, src1, src2, vector_len); break; + default: assert(false, "wrong type"); + } + break; + case Op_MaxReductionV: + switch (typ) { + case T_BYTE: vpmaxsb(dst, src1, src2, vector_len); break; + case T_SHORT: vpmaxsw(dst, src1, src2, vector_len); break; + case T_INT: vpmaxsd(dst, src1, src2, vector_len); break; + case T_LONG: assert(UseAVX > 2, "required"); + vpmaxsq(dst, src1, src2, vector_len); break; + default: assert(false, "wrong type"); + } + break; + case Op_AddReductionVI: + switch (typ) { + case T_BYTE: vpaddb(dst, src1, src2, vector_len); break; + case T_SHORT: vpaddw(dst, src1, src2, vector_len); break; + case T_INT: vpaddd(dst, src1, src2, vector_len); break; + default: assert(false, "wrong type"); + } + break; case Op_AddReductionVL: vpaddq(dst, src1, src2, vector_len); break; - - case Op_MulReductionVI: vpmulld(dst, src1, src2, vector_len); break; + case Op_MulReductionVI: + switch (typ) { + case T_SHORT: vpmullw(dst, src1, src2, vector_len); break; + case T_INT: vpmulld(dst, src1, src2, vector_len); break; + default: assert(false, "wrong type"); + } + break; case Op_MulReductionVL: vpmullq(dst, src1, src2, vector_len); break; - - default: assert(false, "wrong opcode"); + default: assert(false, "wrong opcode"); } } @@ -1087,9 +1596,48 @@ void C2_MacroAssembler::reduce_fp(int opcode, int vlen, } } +void C2_MacroAssembler::reduceB(int opcode, int vlen, + Register dst, Register src1, XMMRegister src2, + XMMRegister vtmp1, XMMRegister vtmp2) { + switch (vlen) { + case 8: reduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 16: reduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 32: reduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 64: reduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + + default: assert(false, "wrong vector length"); + } +} + +void C2_MacroAssembler::mulreduceB(int opcode, int vlen, + Register dst, Register src1, XMMRegister src2, + XMMRegister vtmp1, XMMRegister vtmp2) { + switch (vlen) { + case 8: mulreduce8B (opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 16: mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 32: mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 64: mulreduce64B(opcode, dst, src1, src2, vtmp1, vtmp2); break; + + default: assert(false, "wrong vector length"); + } +} + +void C2_MacroAssembler::reduceS(int opcode, int vlen, + Register dst, Register src1, XMMRegister src2, + XMMRegister vtmp1, XMMRegister vtmp2) { + switch (vlen) { + case 4: reduce4S (opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 8: reduce8S (opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 16: reduce16S(opcode, dst, src1, src2, vtmp1, vtmp2); break; + case 32: reduce32S(opcode, dst, src1, src2, vtmp1, vtmp2); break; + + default: assert(false, "wrong vector length"); + } +} + void C2_MacroAssembler::reduceI(int opcode, int vlen, - Register dst, Register src1, XMMRegister src2, - XMMRegister vtmp1, XMMRegister vtmp2) { + Register dst, Register src1, XMMRegister src2, + XMMRegister vtmp1, XMMRegister vtmp2) { switch (vlen) { case 2: reduce2I (opcode, dst, src1, src2, vtmp1, vtmp2); break; case 4: reduce4I (opcode, dst, src1, src2, vtmp1, vtmp2); break; @@ -1102,8 +1650,8 @@ void C2_MacroAssembler::reduceI(int opcode, int vlen, #ifdef _LP64 void C2_MacroAssembler::reduceL(int opcode, int vlen, - Register dst, Register src1, XMMRegister src2, - XMMRegister vtmp1, XMMRegister vtmp2) { + Register dst, Register src1, XMMRegister src2, + XMMRegister vtmp1, XMMRegister vtmp2) { switch (vlen) { case 2: reduce2L(opcode, dst, src1, src2, vtmp1, vtmp2); break; case 4: reduce4L(opcode, dst, src1, src2, vtmp1, vtmp2); break; @@ -1158,10 +1706,10 @@ void C2_MacroAssembler::reduce2I(int opcode, Register dst, Register src1, XMMReg phaddd(vtmp1, vtmp1); } else { pshufd(vtmp1, src2, 0x1); - reduce_operation_128(opcode, vtmp1, src2); + reduce_operation_128(T_INT, opcode, vtmp1, src2); } movdl(vtmp2, src1); - reduce_operation_128(opcode, vtmp1, vtmp2); + reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); movdl(dst, vtmp1); } @@ -1174,7 +1722,7 @@ void C2_MacroAssembler::reduce4I(int opcode, Register dst, Register src1, XMMReg reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); } else { pshufd(vtmp2, src2, 0xE); - reduce_operation_128(opcode, vtmp2, src2); + reduce_operation_128(T_INT, opcode, vtmp2, src2); reduce2I(opcode, dst, src1, vtmp2, vtmp1, vtmp2); } } @@ -1187,51 +1735,176 @@ void C2_MacroAssembler::reduce8I(int opcode, Register dst, Register src1, XMMReg reduce2I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); } else { vextracti128_high(vtmp1, src2); - reduce_operation_128(opcode, vtmp1, src2); + reduce_operation_128(T_INT, opcode, vtmp1, src2); reduce4I(opcode, dst, src1, vtmp1, vtmp1, vtmp2); } } void C2_MacroAssembler::reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { vextracti64x4_high(vtmp2, src2); - reduce_operation_256(opcode, vtmp2, vtmp2, src2); + reduce_operation_256(T_INT, opcode, vtmp2, vtmp2, src2); reduce8I(opcode, dst, src1, vtmp2, vtmp1, vtmp2); } +void C2_MacroAssembler::reduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + pshufd(vtmp2, src2, 0x1); + reduce_operation_128(T_BYTE, opcode, vtmp2, src2); + movdqu(vtmp1, vtmp2); + psrldq(vtmp1, 2); + reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2); + movdqu(vtmp2, vtmp1); + psrldq(vtmp2, 1); + reduce_operation_128(T_BYTE, opcode, vtmp1, vtmp2); + movdl(vtmp2, src1); + pmovsxbd(vtmp1, vtmp1); + reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); + pextrb(dst, vtmp1, 0x0); + movsbl(dst, dst); +} + +void C2_MacroAssembler::reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + pshufd(vtmp1, src2, 0xE); + reduce_operation_128(T_BYTE, opcode, vtmp1, src2); + reduce8B(opcode, dst, src1, vtmp1, vtmp1, vtmp2); +} + +void C2_MacroAssembler::reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + vextracti128_high(vtmp2, src2); + reduce_operation_128(T_BYTE, opcode, vtmp2, src2); + reduce16B(opcode, dst, src1, vtmp2, vtmp1, vtmp2); +} + +void C2_MacroAssembler::reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + vextracti64x4_high(vtmp1, src2); + reduce_operation_256(T_BYTE, opcode, vtmp1, vtmp1, src2); + reduce32B(opcode, dst, src1, vtmp1, vtmp1, vtmp2); +} + +void C2_MacroAssembler::mulreduce8B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + pmovsxbw(vtmp2, src2); + reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); +} + +void C2_MacroAssembler::mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + if (UseAVX > 1) { + int vector_len = Assembler::AVX_256bit; + vpmovsxbw(vtmp1, src2, vector_len); + reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); + } else { + pmovsxbw(vtmp2, src2); + reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); + pshufd(vtmp2, src2, 0x1); + pmovsxbw(vtmp2, src2); + reduce8S(opcode, dst, dst, vtmp2, vtmp1, vtmp2); + } +} + +void C2_MacroAssembler::mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + if (UseAVX > 2 && VM_Version::supports_avx512bw()) { + int vector_len = Assembler::AVX_512bit; + vpmovsxbw(vtmp1, src2, vector_len); + reduce32S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); + } else { + assert(UseAVX >= 2,"Should not reach here."); + mulreduce16B(opcode, dst, src1, src2, vtmp1, vtmp2); + vextracti128_high(vtmp2, src2); + mulreduce16B(opcode, dst, dst, vtmp2, vtmp1, vtmp2); + } +} + +void C2_MacroAssembler::mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + mulreduce32B(opcode, dst, src1, src2, vtmp1, vtmp2); + vextracti64x4_high(vtmp2, src2); + mulreduce32B(opcode, dst, dst, vtmp2, vtmp1, vtmp2); +} + +void C2_MacroAssembler::reduce4S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + if (opcode == Op_AddReductionVI) { + if (vtmp1 != src2) { + movdqu(vtmp1, src2); + } + phaddw(vtmp1, vtmp1); + phaddw(vtmp1, vtmp1); + } else { + pshufd(vtmp2, src2, 0x1); + reduce_operation_128(T_SHORT, opcode, vtmp2, src2); + movdqu(vtmp1, vtmp2); + psrldq(vtmp1, 2); + reduce_operation_128(T_SHORT, opcode, vtmp1, vtmp2); + } + movdl(vtmp2, src1); + pmovsxwd(vtmp1, vtmp1); + reduce_operation_128(T_INT, opcode, vtmp1, vtmp2); + pextrw(dst, vtmp1, 0x0); + movswl(dst, dst); +} + +void C2_MacroAssembler::reduce8S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + if (opcode == Op_AddReductionVI) { + if (vtmp1 != src2) { + movdqu(vtmp1, src2); + } + phaddw(vtmp1, src2); + } else { + pshufd(vtmp1, src2, 0xE); + reduce_operation_128(T_SHORT, opcode, vtmp1, src2); + } + reduce4S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); +} + +void C2_MacroAssembler::reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + if (opcode == Op_AddReductionVI) { + int vector_len = Assembler::AVX_256bit; + vphaddw(vtmp2, src2, src2, vector_len); + vpermq(vtmp2, vtmp2, 0xD8, vector_len); + } else { + vextracti128_high(vtmp2, src2); + reduce_operation_128(T_SHORT, opcode, vtmp2, src2); + } + reduce8S(opcode, dst, src1, vtmp2, vtmp1, vtmp2); +} + +void C2_MacroAssembler::reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { + int vector_len = Assembler::AVX_256bit; + vextracti64x4_high(vtmp1, src2); + reduce_operation_256(T_SHORT, opcode, vtmp1, vtmp1, src2); + reduce16S(opcode, dst, src1, vtmp1, vtmp1, vtmp2); +} + #ifdef _LP64 void C2_MacroAssembler::reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { pshufd(vtmp2, src2, 0xE); - reduce_operation_128(opcode, vtmp2, src2); + reduce_operation_128(T_LONG, opcode, vtmp2, src2); movdq(vtmp1, src1); - reduce_operation_128(opcode, vtmp1, vtmp2); + reduce_operation_128(T_LONG, opcode, vtmp1, vtmp2); movdq(dst, vtmp1); } void C2_MacroAssembler::reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { vextracti128_high(vtmp1, src2); - reduce_operation_128(opcode, vtmp1, src2); + reduce_operation_128(T_LONG, opcode, vtmp1, src2); reduce2L(opcode, dst, src1, vtmp1, vtmp1, vtmp2); } void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) { vextracti64x4_high(vtmp2, src2); - reduce_operation_256(opcode, vtmp2, vtmp2, src2); + reduce_operation_256(T_LONG, opcode, vtmp2, vtmp2, src2); reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2); } #endif // _LP64 void C2_MacroAssembler::reduce2F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { - reduce_operation_128(opcode, dst, src); + reduce_operation_128(T_FLOAT, opcode, dst, src); pshufd(vtmp, src, 0x1); - reduce_operation_128(opcode, dst, vtmp); + reduce_operation_128(T_FLOAT, opcode, dst, vtmp); } void C2_MacroAssembler::reduce4F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { reduce2F(opcode, dst, src, vtmp); pshufd(vtmp, src, 0x2); - reduce_operation_128(opcode, dst, vtmp); + reduce_operation_128(T_FLOAT, opcode, dst, vtmp); pshufd(vtmp, src, 0x3); - reduce_operation_128(opcode, dst, vtmp); + reduce_operation_128(T_FLOAT, opcode, dst, vtmp); } void C2_MacroAssembler::reduce8F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { @@ -1247,9 +1920,9 @@ void C2_MacroAssembler::reduce16F(int opcode, XMMRegister dst, XMMRegister src, } void C2_MacroAssembler::reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp) { - reduce_operation_128(opcode, dst, src); + reduce_operation_128(T_DOUBLE, opcode, dst, src); pshufd(vtmp, src, 0xE); - reduce_operation_128(opcode, dst, vtmp); + reduce_operation_128(T_DOUBLE, opcode, dst, vtmp); } void C2_MacroAssembler::reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2) { @@ -1264,6 +1937,207 @@ void C2_MacroAssembler::reduce8D(int opcode, XMMRegister dst, XMMRegister src, X reduce4D(opcode, dst, vtmp1, vtmp1, vtmp2); } +void C2_MacroAssembler::reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, + XMMRegister dst, XMMRegister src, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, + XMMRegister xmm_0, XMMRegister xmm_1) { + int permconst[] = {1, 14}; + XMMRegister wsrc = src; + XMMRegister wdst = xmm_0; + XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1; + + int vlen_enc = Assembler::AVX_128bit; + if (vlen == 16) { + vlen_enc = Assembler::AVX_256bit; + } + + for (int i = log2(vlen) - 1; i >=0; i--) { + if (i == 0 && !is_dst_valid) { + wdst = dst; + } + if (i == 3) { + vextracti64x4_high(wtmp, wsrc); + } else if (i == 2) { + vextracti128_high(wtmp, wsrc); + } else { // i = [0,1] + vpermilps(wtmp, wsrc, permconst[i], vlen_enc); + } + vminmax_fp(opcode, T_FLOAT, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + wsrc = wdst; + vlen_enc = Assembler::AVX_128bit; + } + if (is_dst_valid) { + vminmax_fp(opcode, T_FLOAT, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } +} + +void C2_MacroAssembler::reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, XMMRegister dst, XMMRegister src, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, + XMMRegister xmm_0, XMMRegister xmm_1) { + XMMRegister wsrc = src; + XMMRegister wdst = xmm_0; + XMMRegister wtmp = (xmm_1 == xnoreg) ? xmm_0: xmm_1; + int vlen_enc = Assembler::AVX_128bit; + if (vlen == 8) { + vlen_enc = Assembler::AVX_256bit; + } + for (int i = log2(vlen) - 1; i >=0; i--) { + if (i == 0 && !is_dst_valid) { + wdst = dst; + } + if (i == 1) { + vextracti128_high(wtmp, wsrc); + } else if (i == 2) { + vextracti64x4_high(wtmp, wsrc); + } else { + assert(i == 0, "%d", i); + vpermilpd(wtmp, wsrc, 1, vlen_enc); + } + vminmax_fp(opcode, T_DOUBLE, wdst, wtmp, wsrc, tmp, atmp, btmp, vlen_enc); + wsrc = wdst; + vlen_enc = Assembler::AVX_128bit; + } + if (is_dst_valid) { + vminmax_fp(opcode, T_DOUBLE, dst, wdst, dst, tmp, atmp, btmp, Assembler::AVX_128bit); + } +} + +void C2_MacroAssembler::extract(BasicType bt, Register dst, XMMRegister src, int idx) { + switch (bt) { + case T_BYTE: pextrb(dst, src, idx); break; + case T_SHORT: pextrw(dst, src, idx); break; + case T_INT: pextrd(dst, src, idx); break; + case T_LONG: pextrq(dst, src, idx); break; + + default: + assert(false,"Should not reach here."); + break; + } +} + +XMMRegister C2_MacroAssembler::get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex) { + int esize = type2aelembytes(typ); + int elem_per_lane = 16/esize; + int lane = elemindex / elem_per_lane; + int eindex = elemindex % elem_per_lane; + + if (lane >= 2) { + assert(UseAVX > 2, "required"); + vextractf32x4(dst, src, lane & 3); + return dst; + } else if (lane > 0) { + assert(UseAVX > 0, "required"); + vextractf128(dst, src, lane); + return dst; + } else { + return src; + } +} + +void C2_MacroAssembler::get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex) { + int esize = type2aelembytes(typ); + int elem_per_lane = 16/esize; + int eindex = elemindex % elem_per_lane; + assert(is_integral_type(typ),"required"); + + if (eindex == 0) { + if (typ == T_LONG) { + movq(dst, src); + } else { + movdl(dst, src); + if (typ == T_BYTE) + movsbl(dst, dst); + else if (typ == T_SHORT) + movswl(dst, dst); + } + } else { + extract(typ, dst, src, eindex); + } +} + +void C2_MacroAssembler::get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp, XMMRegister vtmp) { + int esize = type2aelembytes(typ); + int elem_per_lane = 16/esize; + int eindex = elemindex % elem_per_lane; + assert((typ == T_FLOAT || typ == T_DOUBLE),"required"); + + if (eindex == 0) { + movq(dst, src); + } else { + if (typ == T_FLOAT) { + if (UseAVX == 0) { + movdqu(dst, src); + pshufps(dst, dst, eindex); + } else { + vpshufps(dst, src, src, eindex, Assembler::AVX_128bit); + } + } else { + if (UseAVX == 0) { + movdqu(dst, src); + psrldq(dst, eindex*esize); + } else { + vpsrldq(dst, src, eindex*esize, Assembler::AVX_128bit); + } + movq(dst, dst); + } + } + // Zero upper bits + if (typ == T_FLOAT) { + if (UseAVX == 0) { + assert((vtmp != xnoreg) && (tmp != noreg), "required."); + movdqu(vtmp, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), tmp); + pand(dst, vtmp); + } else { + assert((tmp != noreg), "required."); + vpand(dst, dst, ExternalAddress(StubRoutines::x86::vector_32_bit_mask()), Assembler::AVX_128bit, tmp); + } + } +} + +void C2_MacroAssembler::evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch) { + switch(typ) { + case T_BYTE: + evpcmpb(kdmask, ksmask, src1, adr, comparison, vector_len, scratch); + break; + case T_SHORT: + evpcmpw(kdmask, ksmask, src1, adr, comparison, vector_len, scratch); + break; + case T_INT: + case T_FLOAT: + evpcmpd(kdmask, ksmask, src1, adr, comparison, vector_len, scratch); + break; + case T_LONG: + case T_DOUBLE: + evpcmpq(kdmask, ksmask, src1, adr, comparison, vector_len, scratch); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + +void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) { + switch(typ) { + case T_BYTE: + evpblendmb(dst, kmask, src1, src2, merge, vector_len); + break; + case T_SHORT: + evpblendmw(dst, kmask, src1, src2, merge, vector_len); + break; + case T_INT: + case T_FLOAT: + evpblendmd(dst, kmask, src1, src2, merge, vector_len); + break; + case T_LONG: + case T_DOUBLE: + evpblendmq(dst, kmask, src1, src2, merge, vector_len); + break; + default: + assert(false,"Should not reach here."); + break; + } +} + //------------------------------------------------------------------------------------------- // IndexOf for constant substrings with size >= 8 chars @@ -1850,7 +2724,7 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist pmovmskb(tmp, vec3); } bsfl(ch, tmp); - addl(result, ch); + addptr(result, ch); bind(FOUND_SEQ_CHAR); subptr(result, str1); @@ -1859,6 +2733,99 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1, Regist bind(DONE_LABEL); } // string_indexof_char +void C2_MacroAssembler::stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, + XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) { + ShortBranchVerifier sbv(this); + assert(UseSSE42Intrinsics, "SSE4.2 intrinsics are required"); + + int stride = 16; + + Label FOUND_CHAR, SCAN_TO_CHAR_INIT, SCAN_TO_CHAR_LOOP, + SCAN_TO_16_CHAR, SCAN_TO_16_CHAR_LOOP, SCAN_TO_32_CHAR_LOOP, + RET_NOT_FOUND, SCAN_TO_16_CHAR_INIT, + FOUND_SEQ_CHAR, DONE_LABEL; + + movptr(result, str1); + if (UseAVX >= 2) { + cmpl(cnt1, stride); + jcc(Assembler::less, SCAN_TO_CHAR_INIT); + cmpl(cnt1, stride*2); + jcc(Assembler::less, SCAN_TO_16_CHAR_INIT); + movdl(vec1, ch); + vpbroadcastb(vec1, vec1, Assembler::AVX_256bit); + vpxor(vec2, vec2); + movl(tmp, cnt1); + andl(tmp, 0xFFFFFFE0); //vector count (in chars) + andl(cnt1,0x0000001F); //tail count (in chars) + + bind(SCAN_TO_32_CHAR_LOOP); + vmovdqu(vec3, Address(result, 0)); + vpcmpeqb(vec3, vec3, vec1, Assembler::AVX_256bit); + vptest(vec2, vec3); + jcc(Assembler::carryClear, FOUND_CHAR); + addptr(result, 32); + subl(tmp, stride*2); + jcc(Assembler::notZero, SCAN_TO_32_CHAR_LOOP); + jmp(SCAN_TO_16_CHAR); + + bind(SCAN_TO_16_CHAR_INIT); + movdl(vec1, ch); + pxor(vec2, vec2); + pshufb(vec1, vec2); + } + + bind(SCAN_TO_16_CHAR); + cmpl(cnt1, stride); + jcc(Assembler::less, SCAN_TO_CHAR_INIT);//less than 16 entires left + if (UseAVX < 2) { + movdl(vec1, ch); + pxor(vec2, vec2); + pshufb(vec1, vec2); + } + movl(tmp, cnt1); + andl(tmp, 0xFFFFFFF0); //vector count (in bytes) + andl(cnt1,0x0000000F); //tail count (in bytes) + + bind(SCAN_TO_16_CHAR_LOOP); + movdqu(vec3, Address(result, 0)); + pcmpeqb(vec3, vec1); + ptest(vec2, vec3); + jcc(Assembler::carryClear, FOUND_CHAR); + addptr(result, 16); + subl(tmp, stride); + jcc(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);//last 16 items... + + bind(SCAN_TO_CHAR_INIT); + testl(cnt1, cnt1); + jcc(Assembler::zero, RET_NOT_FOUND); + bind(SCAN_TO_CHAR_LOOP); + load_unsigned_byte(tmp, Address(result, 0)); + cmpl(ch, tmp); + jccb(Assembler::equal, FOUND_SEQ_CHAR); + addptr(result, 1); + subl(cnt1, 1); + jccb(Assembler::zero, RET_NOT_FOUND); + jmp(SCAN_TO_CHAR_LOOP); + + bind(RET_NOT_FOUND); + movl(result, -1); + jmpb(DONE_LABEL); + + bind(FOUND_CHAR); + if (UseAVX >= 2) { + vpmovmskb(tmp, vec3); + } else { + pmovmskb(tmp, vec3); + } + bsfl(ch, tmp); + addptr(result, ch); + + bind(FOUND_SEQ_CHAR); + subptr(result, str1); + + bind(DONE_LABEL); +} // stringL_indexof_char + // helper function for string_compare void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2, Address::ScaleFactor scale, Address::ScaleFactor scale1, diff --git a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp index f16b193a21d..79ab55a75ad 100644 --- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp @@ -28,6 +28,8 @@ // C2_MacroAssembler contains high-level macros for C2 public: + Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes); + // special instructions for EVEX void setvectmask(Register dst, Register src); void restorevectmask(); @@ -71,25 +73,69 @@ void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, Register scr); void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr); + + void pminmax(int opcode, BasicType elem_bt, XMMRegister dst, XMMRegister src, + XMMRegister tmp = xnoreg); + void vpminmax(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister src1, XMMRegister src2, + int vlen_enc); + + void vminmax_fp(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister a, XMMRegister b, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, + int vlen_enc); + void evminmax_fp(int opcode, BasicType elem_bt, + XMMRegister dst, XMMRegister a, XMMRegister b, + KRegister ktmp, XMMRegister atmp, XMMRegister btmp, + int vlen_enc); + void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len); void vextendbw(bool sign, XMMRegister dst, XMMRegister src); - void vshiftd(int opcode, XMMRegister dst, XMMRegister src); + void vextendbd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); + void vextendwd(bool sign, XMMRegister dst, XMMRegister src, int vector_len); + + void vshiftd(int opcode, XMMRegister dst, XMMRegister shift); void vshiftd_imm(int opcode, XMMRegister dst, int shift); - void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); void vshiftd_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); - void vshiftw(int opcode, XMMRegister dst, XMMRegister src); - void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); - void vshiftq(int opcode, XMMRegister dst, XMMRegister src); + void vshiftw(int opcode, XMMRegister dst, XMMRegister shift); + void vshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); + void vshiftq(int opcode, XMMRegister dst, XMMRegister shift); void vshiftq_imm(int opcode, XMMRegister dst, int shift); - void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void vshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); void vshiftq_imm(int opcode, XMMRegister dst, XMMRegister nds, int shift, int vector_len); void vprotate_imm(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, int shift, int vector_len); void vprotate_var(int opcode, BasicType etype, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len); - // Reductions for vectors of ints, longs, floats, and doubles. + void varshiftd(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); + void varshiftw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc); + void varshiftq(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vlen_enc, XMMRegister vtmp = xnoreg); + void varshiftbw(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); + void evarshiftb(int opcode, XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len, XMMRegister vtmp, Register scratch); + + void insert(BasicType typ, XMMRegister dst, Register val, int idx); + void vinsert(BasicType typ, XMMRegister dst, XMMRegister src, Register val, int idx); + void vgather(BasicType typ, XMMRegister dst, Register base, XMMRegister idx, XMMRegister mask, int vector_len); + void evgather(BasicType typ, XMMRegister dst, KRegister mask, Register base, XMMRegister idx, int vector_len); + void evscatter(BasicType typ, Register base, XMMRegister idx, KRegister mask, XMMRegister src, int vector_len); + + // extract + void extract(BasicType typ, Register dst, XMMRegister src, int idx); + XMMRegister get_lane(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex); + void get_elem(BasicType typ, Register dst, XMMRegister src, int elemindex); + void get_elem(BasicType typ, XMMRegister dst, XMMRegister src, int elemindex, Register tmp = noreg, XMMRegister vtmp = xnoreg); - // dst = src1 + reduce(op, src2) using vtmp as temps + // blend + void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1); + void evpblend(BasicType typ, XMMRegister dst, KRegister kmask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len); + + void load_vector_mask(XMMRegister dst, XMMRegister src, int vlen_in_bytes, BasicType elem_bt); + void load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes); + + // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles. + + // dst = src1 reduce(op, src2) using vtmp as temps void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); #ifdef _LP64 void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); @@ -99,38 +145,71 @@ void reduce_fp(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2 = xnoreg); + void reduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void mulreduceB(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduceS(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduceFloatMinMax(int opcode, int vlen, bool is_dst_valid, + XMMRegister dst, XMMRegister src, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); + void reduceDoubleMinMax(int opcode, int vlen, bool is_dst_valid, + XMMRegister dst, XMMRegister src, + XMMRegister tmp, XMMRegister atmp, XMMRegister btmp, XMMRegister xmm_0, XMMRegister xmm_1 = xnoreg); private: void reduceF(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); void reduceD(int opcode, int vlen, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); + // Int Reduction void reduce2I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce4I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce8I (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce16I(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + // Byte Reduction + void reduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void mulreduce8B (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void mulreduce16B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void mulreduce32B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void mulreduce64B(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + + // Short Reduction + void reduce4S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce8S (int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce16S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + void reduce32S(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); + + // Long Reduction #ifdef _LP64 void reduce2L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce4L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); void reduce8L(int opcode, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2); #endif // _LP64 + // Float Reduction void reduce2F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); void reduce4F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); void reduce8F (int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); void reduce16F(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); + // Double Reduction void reduce2D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp); void reduce4D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); void reduce8D(int opcode, XMMRegister dst, XMMRegister src, XMMRegister vtmp1, XMMRegister vtmp2); - void reduce_operation_128(int opcode, XMMRegister dst, XMMRegister src); - void reduce_operation_256(int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); + // Base reduction instruction + void reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src); + void reduce_operation_256(BasicType typ, int opcode, XMMRegister dst, XMMRegister src1, XMMRegister src2); public: void string_indexof_char(Register str1, Register cnt1, Register ch, Register result, XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); + void stringL_indexof_char(Register str1, Register cnt1, Register ch, Register result, + XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp); + // IndexOf strings. // Small strings are loaded through stack if they cross page boundary. void string_indexof(Register str1, Register str2, diff --git a/src/hotspot/cpu/x86/c2_globals_x86.hpp b/src/hotspot/cpu/x86/c2_globals_x86.hpp index 6513be7b53e..31e77b52568 100644 --- a/src/hotspot/cpu/x86/c2_globals_x86.hpp +++ b/src/hotspot/cpu/x86/c2_globals_x86.hpp @@ -31,8 +31,6 @@ // Sets the default values for platform dependent flags used by the server compiler. // (see c2_globals.hpp). Alpha-sorted. define_pd_global(bool, BackgroundCompilation, true); -define_pd_global(bool, UseTLAB, true); -define_pd_global(bool, ResizeTLAB, true); define_pd_global(bool, CICompileOSR, true); define_pd_global(bool, InlineIntrinsics, true); define_pd_global(bool, PreferInterpreterNativeStubs, false); diff --git a/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp new file mode 100644 index 00000000000..c3d4850a5db --- /dev/null +++ b/src/hotspot/cpu/x86/c2_safepointPollStubTable_x86.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "opto/compile.hpp" +#include "opto/node.hpp" +#include "opto/output.hpp" +#include "runtime/sharedRuntime.hpp" + +#define __ masm. +void C2SafepointPollStubTable::emit_stub_impl(MacroAssembler& masm, C2SafepointPollStub* entry) const { + assert(SharedRuntime::polling_page_return_handler_blob() != NULL, + "polling page return stub not created yet"); + address stub = SharedRuntime::polling_page_return_handler_blob()->entry_point(); + + RuntimeAddress callback_addr(stub); + + __ bind(entry->_stub_label); + InternalAddress safepoint_pc(masm.pc() - masm.offset() + entry->_safepoint_offset); +#ifdef _LP64 + __ lea(rscratch1, safepoint_pc); + __ movptr(Address(r15_thread, JavaThread::saved_exception_pc_offset()), rscratch1); +#else + const Register tmp1 = rcx; + const Register tmp2 = rdx; + __ push(tmp1); + __ push(tmp2); + + __ lea(tmp1, safepoint_pc); + __ get_thread(tmp2); + __ movptr(Address(tmp2, JavaThread::saved_exception_pc_offset()), tmp1); + + __ pop(tmp2); + __ pop(tmp1); +#endif + __ jump(callback_addr); +} +#undef __ diff --git a/src/hotspot/cpu/x86/frame_x86.cpp b/src/hotspot/cpu/x86/frame_x86.cpp index c433eabf993..1e9bf12cd2b 100644 --- a/src/hotspot/cpu/x86/frame_x86.cpp +++ b/src/hotspot/cpu/x86/frame_x86.cpp @@ -36,6 +36,7 @@ #include "runtime/monitorChunk.hpp" #include "runtime/os.inline.hpp" #include "runtime/signature.hpp" +#include "runtime/stackWatermarkSet.hpp" #include "runtime/stubCodeGenerator.hpp" #include "runtime/stubRoutines.hpp" #include "vmreg_x86.inline.hpp" @@ -469,8 +470,8 @@ frame frame::sender_for_compiled_frame(RegisterMap* map) const { //------------------------------------------------------------------------------ -// frame::sender -frame frame::sender(RegisterMap* map) const { +// frame::sender_raw +frame frame::sender_raw(RegisterMap* map) const { // Default is we done have to follow them. The sender_for_xxx will // update it accordingly map->set_include_argument_oops(false); @@ -487,6 +488,16 @@ frame frame::sender(RegisterMap* map) const { return frame(sender_sp(), link(), sender_pc()); } +frame frame::sender(RegisterMap* map) const { + frame result = sender_raw(map); + + if (map->process_frames()) { + StackWatermarkSet::on_iteration(map->thread(), result); + } + + return result; +} + bool frame::is_interpreted_frame_valid(JavaThread* thread) const { assert(is_interpreted_frame(), "Not an interpreted frame"); // These are reasonable sanity checks diff --git a/src/hotspot/cpu/x86/frame_x86.hpp b/src/hotspot/cpu/x86/frame_x86.hpp index ffe5e92275d..26dbb2aa956 100644 --- a/src/hotspot/cpu/x86/frame_x86.hpp +++ b/src/hotspot/cpu/x86/frame_x86.hpp @@ -156,4 +156,7 @@ static jint interpreter_frame_expression_stack_direction() { return -1; } + // returns the sending frame, without applying any barriers + frame sender_raw(RegisterMap* map) const; + #endif // CPU_X86_FRAME_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp index 58dcd9ed5fb..2aac0608207 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/c1/shenandoahBarrierSetC1_x86.cpp @@ -111,7 +111,8 @@ LIR_Opr ShenandoahBarrierSetC1::atomic_xchg_at_resolved(LIRAccess& access, LIRIt __ xchg(access.resolved_addr(), result, result, LIR_OprFact::illegalOpr); if (access.is_oop()) { - result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), false); + ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(access.decorators(), access.type()); + result = load_reference_barrier(access.gen(), result, LIR_OprFact::addressConst(0), kind); LIR_Opr tmp = gen->new_register(type); __ move(result, tmp); result = tmp; diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp index 5ce3cc95e93..40f16ef2731 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.cpp @@ -32,7 +32,6 @@ #include "gc/shenandoah/shenandoahThreadLocalData.hpp" #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" #include "interpreter/interpreter.hpp" -#include "interpreter/interp_masm.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/thread.hpp" #include "utilities/macros.hpp" @@ -44,8 +43,6 @@ #define __ masm-> -address ShenandoahBarrierSetAssembler::_shenandoah_lrb = NULL; - static void save_xmm_registers(MacroAssembler* masm) { __ subptr(rsp, 64); __ movdbl(Address(rsp, 0), xmm0); @@ -271,11 +268,14 @@ void ShenandoahBarrierSetAssembler::satb_write_barrier_pre(MacroAssembler* masm, __ bind(done); } -void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src) { +void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind) { assert(ShenandoahLoadRefBarrier, "Should be enabled"); - Label done; + Label heap_stable, not_cset; + __ block_comment("load_reference_barrier { "); + + // Check if GC is active #ifdef _LP64 Register thread = r15_thread; #else @@ -289,138 +289,130 @@ void ShenandoahBarrierSetAssembler::load_reference_barrier_not_null(MacroAssembl Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); __ testb(gc_state, ShenandoahHeap::HAS_FORWARDED); - __ jccb(Assembler::zero, done); - - // Use rsi for src address - const Register src_addr = rsi; - // Setup address parameter first, if it does not clobber oop in dst - bool need_addr_setup = (src_addr != dst); - - if (need_addr_setup) { - __ push(src_addr); - __ lea(src_addr, src); - - if (dst != rax) { - // Move obj into rax and save rax - __ push(rax); - __ movptr(rax, dst); - } - } else { - // dst == rsi - __ push(rax); - __ movptr(rax, dst); - - // we can clobber it, since it is outgoing register - __ lea(src_addr, src); - } - - save_xmm_registers(masm); - __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, ShenandoahBarrierSetAssembler::shenandoah_lrb()))); - restore_xmm_registers(masm); - - if (need_addr_setup) { - if (dst != rax) { - __ movptr(dst, rax); - __ pop(rax); + __ jcc(Assembler::zero, heap_stable); + + Register tmp1 = noreg, tmp2 = noreg; + if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) { + // Test for object in cset + // Allocate temporary registers + for (int i = 0; i < 8; i++) { + Register r = as_Register(i); + if (r != rsp && r != rbp && r != dst && r != src.base() && r != src.index()) { + if (tmp1 == noreg) { + tmp1 = r; + } else { + tmp2 = r; + break; + } + } } - __ pop(src_addr); - } else { - __ movptr(dst, rax); - __ pop(rax); + assert(tmp1 != noreg, "tmp1 allocated"); + assert(tmp2 != noreg, "tmp2 allocated"); + assert_different_registers(tmp1, tmp2, src.base(), src.index()); + assert_different_registers(tmp1, tmp2, dst); + + __ push(tmp1); + __ push(tmp2); + + // Optimized cset-test + __ movptr(tmp1, dst); + __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); + __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); + __ movbool(tmp1, Address(tmp1, tmp2, Address::times_1)); + __ testbool(tmp1); + __ jcc(Assembler::zero, not_cset); + } + + uint num_saved_regs = 4 + (dst != rax ? 1 : 0) LP64_ONLY(+4); + __ subptr(rsp, num_saved_regs * wordSize); + uint slot = num_saved_regs; + if (dst != rax) { + __ movptr(Address(rsp, (--slot) * wordSize), rax); } - - __ bind(done); - -#ifndef _LP64 - __ pop(thread); + __ movptr(Address(rsp, (--slot) * wordSize), rcx); + __ movptr(Address(rsp, (--slot) * wordSize), rdx); + __ movptr(Address(rsp, (--slot) * wordSize), rdi); + __ movptr(Address(rsp, (--slot) * wordSize), rsi); +#ifdef _LP64 + __ movptr(Address(rsp, (--slot) * wordSize), r8); + __ movptr(Address(rsp, (--slot) * wordSize), r9); + __ movptr(Address(rsp, (--slot) * wordSize), r10); + __ movptr(Address(rsp, (--slot) * wordSize), r11); + // r12-r15 are callee saved in all calling conventions #endif -} - -void ShenandoahBarrierSetAssembler::load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src) { - if (!ShenandoahLoadRefBarrier) { - return; - } - - Label done; - Label not_null; - Label slow_path; - __ block_comment("load_reference_barrier_native { "); - - // null check - __ testptr(dst, dst); - __ jcc(Assembler::notZero, not_null); - __ jmp(done); - __ bind(not_null); - + assert(slot == 0, "must use all slots"); + // Shuffle registers such that dst is in c_rarg0 and addr in c_rarg1. #ifdef _LP64 - Register thread = r15_thread; + Register arg0 = c_rarg0, arg1 = c_rarg1; #else - Register thread = rcx; - if (thread == dst) { - thread = rbx; - } - __ push(thread); - __ get_thread(thread); -#endif - assert_different_registers(dst, thread); - - Address gc_state(thread, in_bytes(ShenandoahThreadLocalData::gc_state_offset())); - __ testb(gc_state, ShenandoahHeap::EVACUATION); -#ifndef _LP64 - __ pop(thread); + Register arg0 = rdi, arg1 = rsi; #endif - __ jccb(Assembler::notZero, slow_path); - __ jmp(done); - __ bind(slow_path); - - if (dst != rax) { - __ push(rax); + if (dst == arg1) { + __ lea(arg0, src); + __ xchgptr(arg1, arg0); + } else { + __ lea(arg1, src); + __ movptr(arg0, dst); } - __ push(rcx); - __ push(rdx); - __ push(rdi); - __ push(rsi); -#ifdef _LP64 - __ push(r8); - __ push(r9); - __ push(r10); - __ push(r11); - __ push(r12); - __ push(r13); - __ push(r14); - __ push(r15); -#endif - - assert_different_registers(dst, rsi); - __ lea(rsi, src); save_xmm_registers(masm); - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), dst, rsi); + switch (kind) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), arg0, arg1); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), arg0, arg1); + } + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + if (UseCompressedOops) { + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), arg0, arg1); + } else { + __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1); + } + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), arg0, arg1); + break; + default: + ShouldNotReachHere(); + } restore_xmm_registers(masm); #ifdef _LP64 - __ pop(r15); - __ pop(r14); - __ pop(r13); - __ pop(r12); - __ pop(r11); - __ pop(r10); - __ pop(r9); - __ pop(r8); + __ movptr(r11, Address(rsp, (slot++) * wordSize)); + __ movptr(r10, Address(rsp, (slot++) * wordSize)); + __ movptr(r9, Address(rsp, (slot++) * wordSize)); + __ movptr(r8, Address(rsp, (slot++) * wordSize)); #endif - __ pop(rsi); - __ pop(rdi); - __ pop(rdx); - __ pop(rcx); + __ movptr(rsi, Address(rsp, (slot++) * wordSize)); + __ movptr(rdi, Address(rsp, (slot++) * wordSize)); + __ movptr(rdx, Address(rsp, (slot++) * wordSize)); + __ movptr(rcx, Address(rsp, (slot++) * wordSize)); if (dst != rax) { __ movptr(dst, rax); - __ pop(rax); + __ movptr(rax, Address(rsp, (slot++) * wordSize)); } - __ bind(done); - __ block_comment("load_reference_barrier_native { "); + assert(slot == num_saved_regs, "must use all slots"); + __ addptr(rsp, num_saved_regs * wordSize); + + __ bind(not_cset); + + if (kind == ShenandoahBarrierSet::AccessKind::NORMAL) { + __ pop(tmp2); + __ pop(tmp1); + } + + __ bind(heap_stable); + + __ block_comment("} load_reference_barrier"); + +#ifndef _LP64 + __ pop(thread); +#endif } void ShenandoahBarrierSetAssembler::storeval_barrier(MacroAssembler* masm, Register dst, Register tmp) { @@ -464,16 +456,6 @@ void ShenandoahBarrierSetAssembler::storeval_barrier_impl(MacroAssembler* masm, } } -void ShenandoahBarrierSetAssembler::load_reference_barrier(MacroAssembler* masm, Register dst, Address src) { - if (ShenandoahLoadRefBarrier) { - Label done; - __ testptr(dst, dst); - __ jcc(Assembler::zero, done); - load_reference_barrier_not_null(masm, dst, src); - __ bind(done); - } -} - // // Arguments: // @@ -504,7 +486,7 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d // Preserve src location for LRB if (dst == src.base() || dst == src.index()) { - // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() + // Use tmp1 for dst if possible, as it is not used in BarrierAssembler::load_at() if (tmp1->is_valid() && tmp1 != src.base() && tmp1 != src.index()) { dst = tmp1; use_tmp1_for_dst = true; @@ -517,11 +499,8 @@ void ShenandoahBarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet d BarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); - if (ShenandoahBarrierSet::use_load_reference_barrier_native(decorators, type)) { - load_reference_barrier_native(masm, dst, src); - } else { - load_reference_barrier(masm, dst, src); - } + ShenandoahBarrierSet::AccessKind kind = ShenandoahBarrierSet::access_kind(decorators, type); + load_reference_barrier(masm, dst, src, kind); // Move loaded oop to final destination if (dst != result_dst) { @@ -638,7 +617,8 @@ void ShenandoahBarrierSetAssembler::cmpxchg_oop(MacroAssembler* masm, bool exchange, Register tmp1, Register tmp2) { assert(ShenandoahCASBarrier, "Should only be used when CAS barrier is enabled"); assert(oldval == rax, "must be in rax for implicit use in cmpxchg"); - assert_different_registers(oldval, newval, tmp1, tmp2); + assert_different_registers(oldval, tmp1, tmp2); + assert_different_registers(newval, tmp1, tmp2); Label L_success, L_failure; @@ -870,10 +850,18 @@ void ShenandoahBarrierSetAssembler::gen_load_reference_barrier_stub(LIR_Assemble __ bind(slow_path); ce->store_parameter(res, 0); ce->store_parameter(addr, 1); - if (stub->is_native()) { - __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); - } else { - __ call(RuntimeAddress(bs->load_reference_barrier_rt_code_blob()->code_begin())); + switch (stub->kind()) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + __ call(RuntimeAddress(bs->load_reference_barrier_normal_rt_code_blob()->code_begin())); + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + __ call(RuntimeAddress(bs->load_reference_barrier_weak_rt_code_blob()->code_begin())); + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ call(RuntimeAddress(bs->load_reference_barrier_native_rt_code_blob()->code_begin())); + break; + default: + ShouldNotReachHere(); } __ jmp(*stub->continuation()); } @@ -938,7 +926,7 @@ void ShenandoahBarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAss __ epilogue(); } -void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native) { +void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind) { __ prologue("shenandoah_load_reference_barrier", false); // arg0 : object to be resolved @@ -947,20 +935,40 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s #ifdef _LP64 __ load_parameter(0, c_rarg0); __ load_parameter(1, c_rarg1); - if (is_native) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), c_rarg0, c_rarg1); - } else if (UseCompressedOops) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); - } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); + switch (kind) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), c_rarg0, c_rarg1); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), c_rarg0, c_rarg1); + } + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + if (UseCompressedOops) { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak_narrow), c_rarg0, c_rarg1); + } else { + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1); + } + break; + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), c_rarg0, c_rarg1); + break; + default: + ShouldNotReachHere(); } #else __ load_parameter(0, rax); __ load_parameter(1, rbx); - if (is_native) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_native), rax, rbx); - } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); + switch (kind) { + case ShenandoahBarrierSet::AccessKind::NORMAL: + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rbx); + break; + case ShenandoahBarrierSet::AccessKind::WEAK: + case ShenandoahBarrierSet::AccessKind::NATIVE: + __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_weak), rax, rbx); + break; + default: + ShouldNotReachHere(); } #endif @@ -972,104 +980,3 @@ void ShenandoahBarrierSetAssembler::generate_c1_load_reference_barrier_runtime_s #undef __ #endif // COMPILER1 - -address ShenandoahBarrierSetAssembler::shenandoah_lrb() { - assert(_shenandoah_lrb != NULL, "need load reference barrier stub"); - return _shenandoah_lrb; -} - -#define __ cgen->assembler()-> - -/* - * Incoming parameters: - * rax: oop - * rsi: load address - */ -address ShenandoahBarrierSetAssembler::generate_shenandoah_lrb(StubCodeGenerator* cgen) { - __ align(CodeEntryAlignment); - StubCodeMark mark(cgen, "StubRoutines", "shenandoah_lrb"); - address start = __ pc(); - - Label slow_path; - - // We use RDI, which also serves as argument register for slow call. - // RAX always holds the src object ptr, except after the slow call, - // then it holds the result. R8/RBX is used as temporary register. - - Register tmp1 = rdi; - Register tmp2 = LP64_ONLY(r8) NOT_LP64(rbx); - - __ push(tmp1); - __ push(tmp2); - - // Check for object being in the collection set. - __ mov(tmp1, rax); - __ shrptr(tmp1, ShenandoahHeapRegion::region_size_bytes_shift_jint()); - __ movptr(tmp2, (intptr_t) ShenandoahHeap::in_cset_fast_test_addr()); - __ movbool(tmp2, Address(tmp2, tmp1, Address::times_1)); - __ testbool(tmp2); - __ jccb(Assembler::notZero, slow_path); - __ pop(tmp2); - __ pop(tmp1); - __ ret(0); - - __ bind(slow_path); - - __ push(rcx); - __ push(rdx); - __ push(rdi); -#ifdef _LP64 - __ push(r8); - __ push(r9); - __ push(r10); - __ push(r11); - __ push(r12); - __ push(r13); - __ push(r14); - __ push(r15); -#endif - __ push(rbp); - __ movptr(rbp, rsp); - __ andptr(rsp, -StackAlignmentInBytes); - __ push_FPU_state(); - if (UseCompressedOops) { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier_narrow), rax, rsi); - } else { - __ call_VM_leaf(CAST_FROM_FN_PTR(address, ShenandoahRuntime::load_reference_barrier), rax, rsi); - } - __ pop_FPU_state(); - __ movptr(rsp, rbp); - __ pop(rbp); -#ifdef _LP64 - __ pop(r15); - __ pop(r14); - __ pop(r13); - __ pop(r12); - __ pop(r11); - __ pop(r10); - __ pop(r9); - __ pop(r8); -#endif - __ pop(rdi); - __ pop(rdx); - __ pop(rcx); - - __ pop(tmp2); - __ pop(tmp1); - __ ret(0); - - return start; -} - -#undef __ - -void ShenandoahBarrierSetAssembler::barrier_stubs_init() { - if (ShenandoahLoadRefBarrier) { - int stub_code_size = 4096; - ResourceMark rm; - BufferBlob* bb = BufferBlob::create("shenandoah_barrier_stubs", stub_code_size); - CodeBuffer buf(bb); - StubCodeGenerator cgen(&buf); - _shenandoah_lrb = generate_shenandoah_lrb(&cgen); - } -} diff --git a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp index 60aa3b4600d..108b5670206 100644 --- a/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/gc/shenandoah/shenandoahBarrierSetAssembler_x86.hpp @@ -27,6 +27,8 @@ #include "asm/macroAssembler.hpp" #include "gc/shared/barrierSetAssembler.hpp" +#include "gc/shenandoah/shenandoahBarrierSet.hpp" + #ifdef COMPILER1 class LIR_Assembler; class ShenandoahPreBarrierStub; @@ -38,8 +40,6 @@ class StubCodeGenerator; class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { private: - static address _shenandoah_lrb; - void satb_write_barrier_pre(MacroAssembler* masm, Register obj, Register pre_val, @@ -56,25 +56,18 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { bool tosca_live, bool expand_call); - void load_reference_barrier_not_null(MacroAssembler* masm, Register dst, Address src); - void storeval_barrier_impl(MacroAssembler* masm, Register dst, Register tmp); - address generate_shenandoah_lrb(StubCodeGenerator* cgen); - public: - static address shenandoah_lrb(); - void storeval_barrier(MacroAssembler* masm, Register dst, Register tmp); #ifdef COMPILER1 void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub); void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub); void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); - void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, bool is_native); + void generate_c1_load_reference_barrier_runtime_stub(StubAssembler* sasm, ShenandoahBarrierSet::AccessKind kind); #endif - void load_reference_barrier(MacroAssembler* masm, Register dst, Address src); - void load_reference_barrier_native(MacroAssembler* masm, Register dst, Address src); + void load_reference_barrier(MacroAssembler* masm, Register dst, Address src, ShenandoahBarrierSet::AccessKind kind); void cmpxchg_oop(MacroAssembler* masm, Register res, Address addr, Register oldval, Register newval, @@ -87,8 +80,6 @@ class ShenandoahBarrierSetAssembler: public BarrierSetAssembler { Address dst, Register val, Register tmp1, Register tmp2); virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, Register obj, Register tmp, Label& slowpath); - virtual void barrier_stubs_init(); - }; #endif // CPU_X86_GC_SHENANDOAH_SHENANDOAHBARRIERSETASSEMBLER_X86_HPP diff --git a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp index 83c8caa6a58..db558d8cb2a 100644 --- a/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp +++ b/src/hotspot/cpu/x86/gc/z/zGlobals_x86.hpp @@ -24,10 +24,9 @@ #ifndef CPU_X86_GC_Z_ZGLOBALS_X86_HPP #define CPU_X86_GC_Z_ZGLOBALS_X86_HPP -const size_t ZPlatformGranuleSizeShift = 21; // 2MB -const size_t ZPlatformHeapViews = 3; -const size_t ZPlatformNMethodDisarmedOffset = 4; -const size_t ZPlatformCacheLineSize = 64; +const size_t ZPlatformGranuleSizeShift = 21; // 2MB +const size_t ZPlatformHeapViews = 3; +const size_t ZPlatformCacheLineSize = 64; size_t ZPlatformAddressOffsetBits(); size_t ZPlatformAddressMetadataShift(); diff --git a/src/hotspot/cpu/x86/interp_masm_x86.cpp b/src/hotspot/cpu/x86/interp_masm_x86.cpp index 738771e800a..140dcfc2f06 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.cpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.cpp @@ -605,6 +605,10 @@ void InterpreterMacroAssembler::push_i(Register r) { push(r); } +void InterpreterMacroAssembler::push_i_or_ptr(Register r) { + push(r); +} + void InterpreterMacroAssembler::push_f(XMMRegister r) { subptr(rsp, wordSize); movflt(Address(rsp, 0), r); @@ -853,7 +857,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, Label no_safepoint, dispatch; if (table != safepoint_table && generate_poll) { NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); - testb(Address(r15_thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit()); + testb(Address(r15_thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit()); jccb(Assembler::zero, no_safepoint); lea(rscratch1, ExternalAddress((address)safepoint_table)); @@ -872,7 +876,7 @@ void InterpreterMacroAssembler::dispatch_base(TosState state, Label no_safepoint; const Register thread = rcx; get_thread(thread); - testb(Address(thread, Thread::polling_page_offset()), SafepointMechanism::poll_bit()); + testb(Address(thread, Thread::polling_word_offset()), SafepointMechanism::poll_bit()); jccb(Assembler::zero, no_safepoint); ArrayAddress dispatch_addr(ExternalAddress((address)safepoint_table), index); @@ -961,6 +965,7 @@ void InterpreterMacroAssembler::narrow(Register result) { // remove activation // +// Apply stack watermark barrier. // Unlock the receiver if this is a synchronized method. // Unlock any Java monitors from syncronized blocks. // Remove the activation from the stack. @@ -987,7 +992,23 @@ void InterpreterMacroAssembler::remove_activation( const Register rmon = LP64_ONLY(c_rarg1) NOT_LP64(rcx); // monitor pointers need different register // because rdx may have the result in it - NOT_LP64(get_thread(rcx);) + NOT_LP64(get_thread(rthread);) + + // The below poll is for the stack watermark barrier. It allows fixing up frames lazily, + // that would normally not be safe to use. Such bad returns into unsafe territory of + // the stack, will call InterpreterRuntime::at_unwind. + Label slow_path; + Label fast_path; + safepoint_poll(slow_path, rthread, true /* at_return */, false /* in_nmethod */); + jmp(fast_path); + bind(slow_path); + push(state); + set_last_Java_frame(rthread, noreg, rbp, (address)pc()); + super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::at_unwind), rthread); + NOT_LP64(get_thread(rthread);) // call_VM clobbered it, restore + reset_last_Java_frame(rthread, true); + pop(state); + bind(fast_path); // get the value of _do_not_unlock_if_synchronized into rdx const Address do_not_unlock_if_synchronized(rthread, @@ -1938,7 +1959,7 @@ void InterpreterMacroAssembler::profile_switch_case(Register index, void InterpreterMacroAssembler::_interp_verify_oop(Register reg, TosState state, const char* file, int line) { if (state == atos) { - MacroAssembler::_verify_oop(reg, "broken oop", file, line); + MacroAssembler::_verify_oop_checked(reg, "broken oop", file, line); } } diff --git a/src/hotspot/cpu/x86/interp_masm_x86.hpp b/src/hotspot/cpu/x86/interp_masm_x86.hpp index 3e2e33278a1..288b1bd1dfe 100644 --- a/src/hotspot/cpu/x86/interp_masm_x86.hpp +++ b/src/hotspot/cpu/x86/interp_masm_x86.hpp @@ -139,9 +139,18 @@ class InterpreterMacroAssembler: public MacroAssembler { // Expression stack void pop_ptr(Register r = rax); void pop_i(Register r = rax); + + // On x86, pushing a ptr or an int is semantically identical, but we + // maintain a distinction for clarity and for making it easier to change + // semantics in the future void push_ptr(Register r = rax); void push_i(Register r = rax); + // push_i_or_ptr is provided for when explicitly allowing either a ptr or + // an int might have some advantage, while still documenting the fact that a + // ptr might be pushed to the stack. + void push_i_or_ptr(Register r = rax); + void push_f(XMMRegister r); void pop_f(XMMRegister r); void pop_d(XMMRegister r); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.cpp b/src/hotspot/cpu/x86/macroAssembler_x86.cpp index 8b19ddab7b8..d7fabfbbedb 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp @@ -112,6 +112,7 @@ void MacroAssembler::cmpklass(Address src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } + void MacroAssembler::cmpklass(Register src1, Metadata* obj) { cmp_literal32(src1, (int32_t)obj, metadata_Relocation::spec_for_immediate()); } @@ -369,11 +370,6 @@ void MacroAssembler::pushptr(AddressLiteral src) { } } -void MacroAssembler::set_word_if_not_zero(Register dst) { - xorl(dst, dst); - set_byte_if_not_zero(dst); -} - static void pass_arg0(MacroAssembler* masm, Register arg) { masm->push(arg); } @@ -713,8 +709,12 @@ void MacroAssembler::movptr(Register dst, ArrayAddress src) { // src should NEVER be a real pointer. Use AddressLiteral for true pointers void MacroAssembler::movptr(Address dst, intptr_t src) { - mov64(rscratch1, src); - movq(dst, rscratch1); + if (is_simm32(src)) { + movptr(dst, checked_cast(src)); + } else { + mov64(rscratch1, src); + movq(dst, rscratch1); + } } // These are mostly for initializing NULL @@ -2495,6 +2495,7 @@ void MacroAssembler::movdqu(XMMRegister dst, Address src) { void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::movdqu(dst, src); } @@ -2519,6 +2520,7 @@ void MacroAssembler::vmovdqu(XMMRegister dst, Address src) { void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) { assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15"); + if (dst->encoding() == src->encoding()) return; Assembler::vmovdqu(dst, src); } @@ -2532,6 +2534,64 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat } } + +void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) { + if (reachable(src)) { + kmovwl(dst, as_Address(src)); + } else { + lea(scratch_reg, src); + kmovwl(dst, Address(scratch_reg, 0)); + } +} + +void MacroAssembler::evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + if (mask == k0) { + Assembler::evmovdqub(dst, as_Address(src), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, as_Address(src), merge, vector_len); + } + } else { + lea(scratch_reg, src); + if (mask == k0) { + Assembler::evmovdqub(dst, Address(scratch_reg, 0), merge, vector_len); + } else { + Assembler::evmovdqub(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } + } +} + +void MacroAssembler::evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquw(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquw(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + +void MacroAssembler::evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdqul(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdqul(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + +void MacroAssembler::evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, + int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evmovdquq(dst, mask, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evmovdquq(dst, mask, Address(scratch_reg, 0), merge, vector_len); + } +} + void MacroAssembler::evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch) { if (reachable(src)) { Assembler::evmovdquq(dst, as_Address(src), vector_len); @@ -2699,16 +2759,15 @@ void MacroAssembler::save_rax(Register tmp) { else if (tmp != rax) mov(tmp, rax); } -void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) { -#ifdef _LP64 - assert(thread_reg == r15_thread, "should be"); -#else - if (thread_reg == noreg) { - thread_reg = temp_reg; - get_thread(thread_reg); +void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod) { + if (at_return) { + // Note that when in_nmethod is set, the stack pointer is incremented before the poll. Therefore, + // we may safely use rsp instead to perform the stack watermark check. + cmpptr(in_nmethod ? rsp : rbp, Address(thread_reg, Thread::polling_word_offset())); + jcc(Assembler::above, slow_path); + return; } -#endif - testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit()); + testb(Address(thread_reg, Thread::polling_word_offset()), SafepointMechanism::poll_bit()); jcc(Assembler::notZero, slow_path); // handshake bit set implies poll } @@ -3018,6 +3077,98 @@ void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, Assembler::vpcmpeqw(dst, nds, src, vector_len); } +void MacroAssembler::evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, + AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpeqd(kdst, mask, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpeqd(kdst, mask, nds, Address(scratch_reg, 0), vector_len); + } +} + +void MacroAssembler::evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpd(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpd(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpq(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpq(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpb(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpb(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpcmpw(kdst, mask, nds, as_Address(src), comparison, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpcmpw(kdst, mask, nds, Address(scratch_reg, 0), comparison, vector_len); + } +} + +void MacroAssembler::vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len) { + if (width == Assembler::Q) { + Assembler::vpcmpCCq(dst, nds, src, cond_encoding, vector_len); + } else { + Assembler::vpcmpCCbwd(dst, nds, src, cond_encoding, vector_len); + } +} + +void MacroAssembler::vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg) { + int eq_cond_enc = 0x29; + int gt_cond_enc = 0x37; + if (width != Assembler::Q) { + eq_cond_enc = 0x74 + width; + gt_cond_enc = 0x64 + width; + } + switch (cond) { + case eq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + break; + case neq: + vpcmpCC(dst, nds, src, eq_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case le: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case nlt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + vpxor(dst, dst, ExternalAddress(StubRoutines::x86::vector_all_bits_set()), vector_len, scratch_reg); + break; + case lt: + vpcmpCC(dst, src, nds, gt_cond_enc, width, vector_len); + break; + case nle: + vpcmpCC(dst, nds, src, gt_cond_enc, width, vector_len); + break; + default: + assert(false, "Should not reach here"); + } +} + void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) { assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15"); Assembler::vpmovzxbw(dst, src, vector_len); @@ -3142,6 +3293,16 @@ void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src } } +void MacroAssembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, + bool merge, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::evpord(dst, mask, nds, as_Address(src), merge, vector_len); + } else { + lea(scratch_reg, src); + Assembler::evpord(dst, mask, nds, Address(scratch_reg, 0), merge, vector_len); + } +} + void MacroAssembler::vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) { if (reachable(src)) { vdivsd(dst, nds, as_Address(src)); @@ -3238,7 +3399,14 @@ void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, } } -//------------------------------------------------------------------------------------------- +void MacroAssembler::vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) { + if (reachable(src)) { + Assembler::vpermd(dst, nds, as_Address(src), vector_len); + } else { + lea(scratch_reg, src); + Assembler::vpermd(dst, nds, Address(scratch_reg, 0), vector_len); + } +} void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); @@ -3761,44 +3929,6 @@ void MacroAssembler::vallones(XMMRegister dst, int vector_len) { } } -RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset) { - intptr_t value = *delayed_value_addr; - if (value != 0) - return RegisterOrConstant(value + offset); - - // load indirectly to solve generation ordering problem - movptr(tmp, ExternalAddress((address) delayed_value_addr)); - -#ifdef ASSERT - { Label L; - testptr(tmp, tmp); - if (WizardMode) { - const char* buf = NULL; - { - ResourceMark rm; - stringStream ss; - ss.print("DelayedValue=" INTPTR_FORMAT, delayed_value_addr[1]); - buf = code_string(ss.as_string()); - } - jcc(Assembler::notZero, L); - STOP(buf); - } else { - jccb(Assembler::notZero, L); - hlt(); - } - bind(L); - } -#endif - - if (offset != 0) - addptr(tmp, offset); - - return RegisterOrConstant(tmp); -} - - Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, int extra_slot_offset) { // cf. TemplateTable::prepare_invoke(), if (load_receiver). @@ -3820,7 +3950,6 @@ Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, return Address(rsp, scale_reg, scale_factor, offset); } - void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) { if (!VerifyOops) return; @@ -3913,6 +4042,9 @@ class ControlWord { case 1: rc = "round down"; break; case 2: rc = "round up "; break; case 3: rc = "chop "; break; + default: + rc = NULL; // silence compiler warnings + fatal("Unknown rounding control: %d", rounding_control()); }; // precision control const char* pc; @@ -3921,6 +4053,9 @@ class ControlWord { case 1: pc = "reserved"; break; case 2: pc = "53 bits "; break; case 3: pc = "64 bits "; break; + default: + pc = NULL; // silence compiler warnings + fatal("Unknown precision control: %d", precision_control()); }; // flags char f[9]; @@ -5764,7 +5899,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register bind(VECTOR64_LOOP); // AVX512 code to compare 64 byte vectors. - evmovdqub(rymm0, Address(obja, result), Assembler::AVX_512bit); + evmovdqub(rymm0, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, rymm0, Address(objb, result), Assembler::AVX_512bit); kortestql(k7, k7); jcc(Assembler::aboveEqual, VECTOR64_NOT_EQUAL); // mismatch @@ -5783,7 +5918,7 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register notq(tmp2); kmovql(k3, tmp2); - evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit); + evmovdqub(rymm0, k3, Address(obja, result), false, Assembler::AVX_512bit); evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit); ktestql(k7, k3); @@ -7578,7 +7713,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le notl(result); kmovdl(k3, result); - evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); @@ -7603,7 +7738,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le negptr(len); bind(copy_32_loop); - evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); kortestdl(k2, k2); jcc(Assembler::carryClear, return_zero); @@ -7628,7 +7763,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le kmovdl(k3, result); - evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit); + evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit); evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit); ktestd(k2, k3); jcc(Assembler::carryClear, return_zero); @@ -7773,7 +7908,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len // inflate 32 chars per iter bind(copy_32_loop); vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit); - evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit); + evmovdquw(Address(dst, len, Address::times_2), tmp1, /*merge*/ false, Assembler::AVX_512bit); addptr(len, 32); jcc(Assembler::notZero, copy_32_loop); @@ -7788,7 +7923,7 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len notl(tmp3_aliased); kmovdl(k2, tmp3_aliased); evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit); - evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit); + evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit); jmp(done); bind(avx3_threshold); @@ -7963,6 +8098,7 @@ void MacroAssembler::cache_wbsync(bool is_pre) sfence(); } } + #endif // _LP64 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) { diff --git a/src/hotspot/cpu/x86/macroAssembler_x86.hpp b/src/hotspot/cpu/x86/macroAssembler_x86.hpp index 3d009d69945..e7419fc916b 100644 --- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp +++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp @@ -583,22 +583,30 @@ class MacroAssembler: public Assembler { // method handles (JSR 292) Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); - //---- - void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0 - // Debugging // only if +VerifyOops void _verify_oop(Register reg, const char* s, const char* file, int line); void _verify_oop_addr(Address addr, const char* s, const char* file, int line); + void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { + if (VerifyOops) { + _verify_oop(reg, s, file, line); + } + } + void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { + if (VerifyOops) { + _verify_oop_addr(reg, s, file, line); + } + } + // TODO: verify method and klass metadata (compare against vptr?) void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} -#define verify_oop(reg) _verify_oop(reg, "broken oop " #reg, __FILE__, __LINE__) -#define verify_oop_msg(reg, msg) _verify_oop(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) -#define verify_oop_addr(addr) _verify_oop_addr(addr, "broken oop addr " #addr, __FILE__, __LINE__) +#define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) +#define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) +#define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) @@ -643,13 +651,7 @@ class MacroAssembler: public Assembler { // Check for reserved stack access in method being exited (for JIT) void reserved_stack_check(); - virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, - Register tmp, - int offset); - - // If thread_reg is != noreg the code assumes the register passed contains - // the thread (required on 64 bit). - void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg); + void safepoint_poll(Label& slow_path, Register thread_reg, bool at_return, bool in_nmethod); void verify_tlab(); @@ -1078,15 +1080,59 @@ class MacroAssembler: public Assembler { void movdqu(XMMRegister dst, Address src); void movdqu(XMMRegister dst, XMMRegister src); void movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg = rscratch1); + + void kmovwl(KRegister dst, Register src) { Assembler::kmovwl(dst, src); } + void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); } + void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); + // AVX Unaligned forms void vmovdqu(Address dst, XMMRegister src); void vmovdqu(XMMRegister dst, Address src); void vmovdqu(XMMRegister dst, XMMRegister src); void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1); + + // AVX512 Unaligned + void evmovdqub(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } + void evmovdqub(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } + void evmovdqub(XMMRegister dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqub(dst, src, merge, vector_len); } + void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqub(dst, mask, src, merge, vector_len); } + void evmovdqub(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); + + void evmovdquw(Address dst, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } + void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } + void evmovdquw(XMMRegister dst, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, src, merge, vector_len); } + void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquw(dst, mask, src, merge, vector_len); } + void evmovdquw(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); + + void evmovdqul(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } + void evmovdqul(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdqul(dst, src, vector_len); } + void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) { + if (dst->encoding() == src->encoding()) return; + Assembler::evmovdqul(dst, src, vector_len); + } + void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } + void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdqul(dst, mask, src, merge, vector_len); } + void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + if (dst->encoding() == src->encoding() && mask == k0) return; + Assembler::evmovdqul(dst, mask, src, merge, vector_len); + } + void evmovdqul(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); + void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } - void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); } void evmovdquq(XMMRegister dst, AddressLiteral src, int vector_len, Register rscratch); + void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { + if (dst->encoding() == src->encoding()) return; + Assembler::evmovdquq(dst, src, vector_len); + } + void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } + void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len) { Assembler::evmovdquq(dst, mask, src, merge, vector_len); } + void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) { + if (dst->encoding() == src->encoding() && mask == k0) return; + Assembler::evmovdquq(dst, mask, src, merge, vector_len); + } + void evmovdquq(XMMRegister dst, KRegister mask, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); // Move Aligned Double Quadword void movdqa(XMMRegister dst, Address src) { Assembler::movdqa(dst, src); } @@ -1208,6 +1254,30 @@ class MacroAssembler: public Assembler { void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len); + void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); + + // Vector compares + void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { Assembler::evpcmpd(kdst, mask, nds, src, comparison, vector_len); } + void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg); + void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { Assembler::evpcmpq(kdst, mask, nds, src, comparison, vector_len); } + void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg); + void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { Assembler::evpcmpb(kdst, mask, nds, src, comparison, vector_len); } + void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg); + void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, + int comparison, int vector_len) { Assembler::evpcmpw(kdst, mask, nds, src, comparison, vector_len); } + void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, AddressLiteral src, + int comparison, int vector_len, Register scratch_reg); + + + // Emit comparison instruction for the specified comparison predicate. + void vpcmpCCW(XMMRegister dst, XMMRegister nds, XMMRegister src, ComparisonPredicate cond, Width width, int vector_len, Register scratch_reg); + void vpcmpCC(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, Width width, int vector_len); void vpmovzxbw(XMMRegister dst, Address src, int vector_len); void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vpmovzxbw(dst, src, vector_len); } @@ -1236,6 +1306,7 @@ class MacroAssembler: public Assembler { void vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len); void vptest(XMMRegister dst, XMMRegister src); + void vptest(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::vptest(dst, src, vector_len); } void punpcklbw(XMMRegister dst, XMMRegister src); void punpcklbw(XMMRegister dst, Address src) { Assembler::punpcklbw(dst, src); } @@ -1254,6 +1325,8 @@ class MacroAssembler: public Assembler { void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); } void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1); + void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, AddressLiteral src, bool merge, int vector_len, Register scratch_reg); + void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); } void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); } void vdivsd(XMMRegister dst, XMMRegister nds, AddressLiteral src); @@ -1309,6 +1382,9 @@ class MacroAssembler: public Assembler { void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); } void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); } + void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpermd(dst, nds, src, vector_len); } + void vpermd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg); + void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) { if (UseAVX > 2 && VM_Version::supports_avx512novl()) { Assembler::vinserti32x4(dst, dst, src, imm8); @@ -1727,6 +1803,35 @@ class MacroAssembler: public Assembler { void cache_wb(Address line); void cache_wbsync(bool is_pre); + +#if COMPILER2_OR_JVMCI + void arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, + Register to, Register count, int shift, + Register index, Register temp, + bool use64byteVector, Label& L_entry, Label& L_exit); + + void arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, + Register to, Register start_index, Register end_index, + Register count, int shift, Register temp, + bool use64byteVector, Label& L_entry, Label& L_exit); + + void copy64_masked_avx(Register dst, Register src, XMMRegister xmm, + KRegister mask, Register length, Register index, + Register temp, int shift = Address::times_1, int offset = 0, + bool use64byteVector = false); + + void copy32_masked_avx(Register dst, Register src, XMMRegister xmm, + KRegister mask, Register length, Register index, + Register temp, int shift = Address::times_1, int offset = 0); + + void copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, + int shift = Address::times_1, int offset = 0); + + void copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, + bool conjoint, int shift = Address::times_1, int offset = 0, + bool use64byteVector = false); +#endif // COMPILER2_OR_JVMCI + #endif // _LP64 void vallones(XMMRegister dst, int vector_len); diff --git a/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp new file mode 100644 index 00000000000..4368dee7329 --- /dev/null +++ b/src/hotspot/cpu/x86/macroAssembler_x86_arrayCopy_avx3.cpp @@ -0,0 +1,253 @@ +/* +* Copyright (c) 2020, Intel Corporation. +* +* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +* +* This code is free software; you can redistribute it and/or modify it +* under the terms of the GNU General Public License version 2 only, as +* published by the Free Software Foundation. +* +* This code is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +* version 2 for more details (a copy is included in the LICENSE file that +* accompanied this code). +* +* You should have received a copy of the GNU General Public License version +* 2 along with this work; if not, write to the Free Software Foundation, +* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. +* +* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA +* or visit www.oracle.com if you need additional information or have any +* questions. +* +*/ + +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/macroAssembler.inline.hpp" + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) /* nothing */ +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +#ifdef _LP64 + +#if COMPILER2_OR_JVMCI + +void MacroAssembler::arraycopy_avx3_special_cases(XMMRegister xmm, KRegister mask, Register from, + Register to, Register count, int shift, + Register index, Register temp, + bool use64byteVector, Label& L_entry, Label& L_exit) { + Label L_entry_64, L_entry_96, L_entry_128; + Label L_entry_160, L_entry_192; + + int size_mat[][6] = { + /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 }, + /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 }, + /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 }, + /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 } + }; + + // Case A) Special case for length less than equal to 32 bytes. + cmpq(count, size_mat[shift][0]); + jccb(Assembler::greater, L_entry_64); + copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift); + jmp(L_exit); + + // Case B) Special case for length less than equal to 64 bytes. + BIND(L_entry_64); + cmpq(count, size_mat[shift][1]); + jccb(Assembler::greater, L_entry_96); + copy64_masked_avx(to, from, xmm, mask, count, index, temp, shift, 0, use64byteVector); + jmp(L_exit); + + // Case C) Special case for length less than equal to 96 bytes. + BIND(L_entry_96); + cmpq(count, size_mat[shift][2]); + jccb(Assembler::greater, L_entry_128); + copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector); + subq(count, 64 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 64); + jmp(L_exit); + + // Case D) Special case for length less than equal to 128 bytes. + BIND(L_entry_128); + cmpq(count, size_mat[shift][3]); + jccb(Assembler::greater, L_entry_160); + copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector); + copy32_avx(to, from, index, xmm, shift, 64); + subq(count, 96 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 96); + jmp(L_exit); + + // Case E) Special case for length less than equal to 160 bytes. + BIND(L_entry_160); + cmpq(count, size_mat[shift][4]); + jccb(Assembler::greater, L_entry_192); + copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector); + copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector); + subq(count, 128 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 128); + jmp(L_exit); + + // Case F) Special case for length less than equal to 192 bytes. + BIND(L_entry_192); + cmpq(count, size_mat[shift][5]); + jcc(Assembler::greater, L_entry); + copy64_avx(to, from, index, xmm, false, shift, 0, use64byteVector); + copy64_avx(to, from, index, xmm, false, shift, 64, use64byteVector); + copy32_avx(to, from, index, xmm, shift, 128); + subq(count, 160 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, index, temp, shift, 160); + jmp(L_exit); +} + +void MacroAssembler::arraycopy_avx3_special_cases_conjoint(XMMRegister xmm, KRegister mask, Register from, + Register to, Register start_index, Register end_index, + Register count, int shift, Register temp, + bool use64byteVector, Label& L_entry, Label& L_exit) { + Label L_entry_64, L_entry_96, L_entry_128; + Label L_entry_160, L_entry_192; + bool avx3 = MaxVectorSize > 32 && AVX3Threshold == 0; + + int size_mat[][6] = { + /* T_BYTE */ {32 , 64, 96 , 128 , 160 , 192 }, + /* T_SHORT*/ {16 , 32, 48 , 64 , 80 , 96 }, + /* T_INT */ {8 , 16, 24 , 32 , 40 , 48 }, + /* T_LONG */ {4 , 8, 12 , 16 , 20 , 24 } + }; + + // Case A) Special case for length less than equal to 32 bytes. + cmpq(count, size_mat[shift][0]); + jccb(Assembler::greater, L_entry_64); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + jmp(L_exit); + + // Case B) Special case for length less than equal to 64 bytes. + BIND(L_entry_64); + cmpq(count, size_mat[shift][1]); + jccb(Assembler::greater, L_entry_96); + if (avx3) { + copy64_masked_avx(to, from, xmm, mask, count, start_index, temp, shift, 0, true); + } else { + copy32_avx(to, from, end_index, xmm, shift, -32); + subq(count, 32 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + } + jmp(L_exit); + + // Case C) Special case for length less than equal to 96 bytes. + BIND(L_entry_96); + cmpq(count, size_mat[shift][2]); + jccb(Assembler::greater, L_entry_128); + copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector); + subq(count, 64 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + jmp(L_exit); + + // Case D) Special case for length less than equal to 128 bytes. + BIND(L_entry_128); + cmpq(count, size_mat[shift][3]); + jccb(Assembler::greater, L_entry_160); + copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector); + copy32_avx(to, from, end_index, xmm, shift, -96); + subq(count, 96 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + jmp(L_exit); + + // Case E) Special case for length less than equal to 160 bytes. + BIND(L_entry_160); + cmpq(count, size_mat[shift][4]); + jccb(Assembler::greater, L_entry_192); + copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector); + copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector); + subq(count, 128 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + jmp(L_exit); + + // Case F) Special case for length less than equal to 192 bytes. + BIND(L_entry_192); + cmpq(count, size_mat[shift][5]); + jcc(Assembler::greater, L_entry); + copy64_avx(to, from, end_index, xmm, true, shift, -64, use64byteVector); + copy64_avx(to, from, end_index, xmm, true, shift, -128, use64byteVector); + copy32_avx(to, from, end_index, xmm, shift, -160); + subq(count, 160 >> shift); + copy32_masked_avx(to, from, xmm, mask, count, start_index, temp, shift); + jmp(L_exit); +} + +void MacroAssembler::copy64_masked_avx(Register dst, Register src, XMMRegister xmm, + KRegister mask, Register length, Register index, + Register temp, int shift, int offset, + bool use64byteVector) { + BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; + assert(MaxVectorSize >= 32, "vector length should be >= 32"); + if (!use64byteVector) { + copy32_avx(dst, src, index, xmm, shift, offset); + subptr(length, 32 >> shift); + copy32_masked_avx(dst, src, xmm, mask, length, index, temp, shift, offset+32); + } else { + Address::ScaleFactor scale = (Address::ScaleFactor)(shift); + assert(MaxVectorSize == 64, "vector length != 64"); + negptr(length); + addq(length, 64); + mov64(temp, -1); + shrxq(temp, temp, length); + kmovql(mask, temp); + evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_512bit, type[shift]); + evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_512bit, type[shift]); + } +} + + +void MacroAssembler::copy32_masked_avx(Register dst, Register src, XMMRegister xmm, + KRegister mask, Register length, Register index, + Register temp, int shift, int offset) { + assert(MaxVectorSize >= 32, "vector length should be >= 32"); + BasicType type[] = { T_BYTE, T_SHORT, T_INT, T_LONG}; + Address::ScaleFactor scale = (Address::ScaleFactor)(shift); + mov64(temp, 1); + shlxq(temp, temp, length); + decq(temp); + kmovql(mask, temp); + evmovdqu(xmm, mask, Address(src, index, scale, offset), Assembler::AVX_256bit, type[shift]); + evmovdqu(Address(dst, index, scale, offset), mask, xmm, Assembler::AVX_256bit, type[shift]); +} + + +void MacroAssembler::copy32_avx(Register dst, Register src, Register index, XMMRegister xmm, + int shift, int offset) { + assert(MaxVectorSize >= 32, "vector length should be >= 32"); + Address::ScaleFactor scale = (Address::ScaleFactor)(shift); + vmovdqu(xmm, Address(src, index, scale, offset)); + vmovdqu(Address(dst, index, scale, offset), xmm); +} + + +void MacroAssembler::copy64_avx(Register dst, Register src, Register index, XMMRegister xmm, + bool conjoint, int shift, int offset, bool use64byteVector) { + assert(MaxVectorSize == 64 || MaxVectorSize == 32, "vector length mismatch"); + if (!use64byteVector) { + if (conjoint) { + copy32_avx(dst, src, index, xmm, shift, offset+32); + copy32_avx(dst, src, index, xmm, shift, offset); + } else { + copy32_avx(dst, src, index, xmm, shift, offset); + copy32_avx(dst, src, index, xmm, shift, offset+32); + } + } else { + Address::ScaleFactor scale = (Address::ScaleFactor)(shift); + evmovdquq(xmm, Address(src, index, scale, offset), Assembler::AVX_512bit); + evmovdquq(Address(dst, index, scale, offset), xmm, Assembler::AVX_512bit); + } +} + +#endif // COMPILER2_OR_JVMCI + +#endif diff --git a/src/hotspot/cpu/x86/methodHandles_x86.hpp b/src/hotspot/cpu/x86/methodHandles_x86.hpp index bb333781a62..444d0495666 100644 --- a/src/hotspot/cpu/x86/methodHandles_x86.hpp +++ b/src/hotspot/cpu/x86/methodHandles_x86.hpp @@ -27,7 +27,7 @@ // Adapters enum /* platform_dependent_constants */ { - adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000)) + adapter_code_size = 4000 DEBUG_ONLY(+ 6000) }; // Additional helper methods for MethodHandles code generation: diff --git a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp index 066d1ae98cb..3e2b3a118c7 100644 --- a/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp +++ b/src/hotspot/cpu/x86/sharedRuntime_x86_32.cpp @@ -37,6 +37,7 @@ #include "memory/resourceArea.hpp" #include "oops/compiledICHolder.hpp" #include "oops/klass.inline.hpp" +#include "prims/methodHandles.hpp" #include "runtime/safepointMechanism.hpp" #include "runtime/sharedRuntime.hpp" #include "runtime/vframeArray.hpp" @@ -1213,265 +1214,6 @@ void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_ty } } - -static void save_or_restore_arguments(MacroAssembler* masm, - const int stack_slots, - const int total_in_args, - const int arg_save_area, - OopMap* map, - VMRegPair* in_regs, - BasicType* in_sig_bt) { - // if map is non-NULL then the code should store the values, - // otherwise it should load them. - int handle_index = 0; - // Save down double word first - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_XMMRegister() && in_sig_bt[i] == T_DOUBLE) { - int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area; - int offset = slot * VMRegImpl::stack_slot_size; - handle_index += 2; - assert(handle_index <= stack_slots, "overflow"); - if (map != NULL) { - __ movdbl(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); - } else { - __ movdbl(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); - } - } - if (in_regs[i].first()->is_Register() && in_sig_bt[i] == T_LONG) { - int slot = handle_index * VMRegImpl::slots_per_word + arg_save_area; - int offset = slot * VMRegImpl::stack_slot_size; - handle_index += 2; - assert(handle_index <= stack_slots, "overflow"); - if (map != NULL) { - __ movl(Address(rsp, offset), in_regs[i].first()->as_Register()); - if (in_regs[i].second()->is_Register()) { - __ movl(Address(rsp, offset + 4), in_regs[i].second()->as_Register()); - } - } else { - __ movl(in_regs[i].first()->as_Register(), Address(rsp, offset)); - if (in_regs[i].second()->is_Register()) { - __ movl(in_regs[i].second()->as_Register(), Address(rsp, offset + 4)); - } - } - } - } - // Save or restore single word registers - for ( int i = 0; i < total_in_args; i++) { - if (in_regs[i].first()->is_Register()) { - int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area; - int offset = slot * VMRegImpl::stack_slot_size; - assert(handle_index <= stack_slots, "overflow"); - if (in_sig_bt[i] == T_ARRAY && map != NULL) { - map->set_oop(VMRegImpl::stack2reg(slot));; - } - - // Value is in an input register pass we must flush it to the stack - const Register reg = in_regs[i].first()->as_Register(); - switch (in_sig_bt[i]) { - case T_ARRAY: - if (map != NULL) { - __ movptr(Address(rsp, offset), reg); - } else { - __ movptr(reg, Address(rsp, offset)); - } - break; - case T_BOOLEAN: - case T_CHAR: - case T_BYTE: - case T_SHORT: - case T_INT: - if (map != NULL) { - __ movl(Address(rsp, offset), reg); - } else { - __ movl(reg, Address(rsp, offset)); - } - break; - case T_OBJECT: - default: ShouldNotReachHere(); - } - } else if (in_regs[i].first()->is_XMMRegister()) { - if (in_sig_bt[i] == T_FLOAT) { - int slot = handle_index++ * VMRegImpl::slots_per_word + arg_save_area; - int offset = slot * VMRegImpl::stack_slot_size; - assert(handle_index <= stack_slots, "overflow"); - if (map != NULL) { - __ movflt(Address(rsp, offset), in_regs[i].first()->as_XMMRegister()); - } else { - __ movflt(in_regs[i].first()->as_XMMRegister(), Address(rsp, offset)); - } - } - } else if (in_regs[i].first()->is_stack()) { - if (in_sig_bt[i] == T_ARRAY && map != NULL) { - int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); - map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots)); - } - } - } -} - -// Registers need to be saved for runtime call -static Register caller_saved_registers[] = { - rcx, rdx, rsi, rdi -}; - -// Save caller saved registers except r1 and r2 -static void save_registers_except(MacroAssembler* masm, Register r1, Register r2) { - int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register)); - for (int index = 0; index < reg_len; index ++) { - Register this_reg = caller_saved_registers[index]; - if (this_reg != r1 && this_reg != r2) { - __ push(this_reg); - } - } -} - -// Restore caller saved registers except r1 and r2 -static void restore_registers_except(MacroAssembler* masm, Register r1, Register r2) { - int reg_len = (int)(sizeof(caller_saved_registers) / sizeof(Register)); - for (int index = reg_len - 1; index >= 0; index --) { - Register this_reg = caller_saved_registers[index]; - if (this_reg != r1 && this_reg != r2) { - __ pop(this_reg); - } - } -} - -// Pin object, return pinned object or null in rax -static void gen_pin_object(MacroAssembler* masm, - Register thread, VMRegPair reg) { - __ block_comment("gen_pin_object {"); - - Label is_null; - Register tmp_reg = rax; - VMRegPair tmp(tmp_reg->as_VMReg()); - if (reg.first()->is_stack()) { - // Load the arg up from the stack - simple_move32(masm, reg, tmp); - reg = tmp; - } else { - __ movl(tmp_reg, reg.first()->as_Register()); - } - __ testptr(reg.first()->as_Register(), reg.first()->as_Register()); - __ jccb(Assembler::equal, is_null); - - // Save registers that may be used by runtime call - Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg; - save_registers_except(masm, arg, thread); - - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::pin_object), - thread, reg.first()->as_Register()); - - // Restore saved registers - restore_registers_except(masm, arg, thread); - - __ bind(is_null); - __ block_comment("} gen_pin_object"); -} - -// Unpin object -static void gen_unpin_object(MacroAssembler* masm, - Register thread, VMRegPair reg) { - __ block_comment("gen_unpin_object {"); - Label is_null; - - // temp register - __ push(rax); - Register tmp_reg = rax; - VMRegPair tmp(tmp_reg->as_VMReg()); - - simple_move32(masm, reg, tmp); - - __ testptr(rax, rax); - __ jccb(Assembler::equal, is_null); - - // Save registers that may be used by runtime call - Register arg = reg.first()->is_Register() ? reg.first()->as_Register() : noreg; - save_registers_except(masm, arg, thread); - - __ call_VM_leaf( - CAST_FROM_FN_PTR(address, SharedRuntime::unpin_object), - thread, rax); - - // Restore saved registers - restore_registers_except(masm, arg, thread); - __ bind(is_null); - __ pop(rax); - __ block_comment("} gen_unpin_object"); -} - -// Check GCLocker::needs_gc and enter the runtime if it's true. This -// keeps a new JNI critical region from starting until a GC has been -// forced. Save down any oops in registers and describe them in an -// OopMap. -static void check_needs_gc_for_critical_native(MacroAssembler* masm, - Register thread, - int stack_slots, - int total_c_args, - int total_in_args, - int arg_save_area, -