diff --git a/jdk/make/data/characterdata/CharacterData02.java.template b/jdk/make/data/characterdata/CharacterData02.java.template index ea50e8f5219..32388f266e5 100644 --- a/jdk/make/data/characterdata/CharacterData02.java.template +++ b/jdk/make/data/characterdata/CharacterData02.java.template @@ -103,11 +103,21 @@ class CharacterData02 extends CharacterData { } boolean isJavaIdentifierStart(int ch) { + // isJavaIdentifierStart strictly conforms to code points assigned + // in Unicode 6.2. + if(Character.UnicodeBlock.of(ch) == + Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E) + return false; int props = getProperties(ch); return ((props & $$maskIdentifierInfo) >= $$lowJavaStart); } boolean isJavaIdentifierPart(int ch) { + // isJavaIdentifierPart strictly conforms to code points assigned + // in Unicode 6.2. + if(Character.UnicodeBlock.of(ch) == + Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E) + return false; int props = getProperties(ch); return ((props & $$nonzeroJavaPart) != 0); } diff --git a/jdk/make/data/unicodedata/UnicodeData.txt b/jdk/make/data/unicodedata/UnicodeData.txt index a121b68d62e..3ed5585528a 100644 --- a/jdk/make/data/unicodedata/UnicodeData.txt +++ b/jdk/make/data/unicodedata/UnicodeData.txt @@ -23550,6 +23550,8 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 2B734;;Lo;0;L;;;;;N;;;;; 2B740;;Lo;0;L;;;;;N;;;;; 2B81D;;Lo;0;L;;;;;N;;;;; +2B820;;Lo;0;L;;;;;N;;;;; +2CEA1;;Lo;0;L;;;;;N;;;;; 2F800;CJK COMPATIBILITY IDEOGRAPH-2F800;Lo;0;L;4E3D;;;;N;;;;; 2F801;CJK COMPATIBILITY IDEOGRAPH-2F801;Lo;0;L;4E38;;;;N;;;;; 2F802;CJK COMPATIBILITY IDEOGRAPH-2F802;Lo;0;L;4E41;;;;N;;;;; diff --git a/jdk/src/share/classes/java/lang/Character.java b/jdk/src/share/classes/java/lang/Character.java index e7f994619ba..fb0f33d6777 100644 --- a/jdk/src/share/classes/java/lang/Character.java +++ b/jdk/src/share/classes/java/lang/Character.java @@ -58,8 +58,10 @@ * block from version 10.0 of the Unicode Standard. Second, the Java SE 8 Platform * allows an implementation of class {@code Character} to use the code points * in the range of {@code U+9FCD} to {@code U+9FEF} from version 11.0 of the - * Unicode Standard, in order for the class to allow the "Implementation - * Level 1" of the Chinese GB18030-2022 standard. Third, the Java SE 8 Platform + * Unicode Standard and in the {@code CJK Unified Ideographs Extension E} block + * from version 8.0 of the Unicode Standard, in order for the class to allow the + * "Implementation Level 2" of the Chinese GB18030-2022 standard. + * Third, the Java SE 8 Platform * allows an implementation of class {@code Character} to use the Japanese Era * code point, {@code U+32FF}, from the Unicode Standard version 12.1. * Consequently, the @@ -2575,7 +2577,18 @@ private UnicodeBlock(String idName, String... aliases) { "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", "ARABICMATHEMATICALALPHABETICSYMBOLS"); - private static final int[] blockStarts = { + /** + * Constant for the "CJK Unified Ideographs Extension E" Unicode + * character block. + * @apiNote This field is defined in Java SE 8 Maintenance Release 5. + * @since 1.8 + */ + public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = + new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", + "CJK UNIFIED IDEOGRAPHS EXTENSION E", + "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); + + private static final int blockStarts[] = { 0x0000, // 0000..007F; Basic Latin 0x0080, // 0080..00FF; Latin-1 Supplement 0x0100, // 0100..017F; Latin Extended-A @@ -2823,7 +2836,8 @@ private UnicodeBlock(String idName, String... aliases) { 0x2A6E0, // unassigned 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D - 0x2B820, // unassigned + 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E + 0x2CEB0, // unassigned 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 0x2FA20, // unassigned 0xE0000, // E0000..E007F; Tags @@ -3082,6 +3096,7 @@ private UnicodeBlock(String idName, String... aliases) { null, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, + CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, null, CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, null, diff --git a/jdk/test/java/lang/Character/CheckScript.java b/jdk/test/java/lang/Character/CheckScript.java index 713e3c8e886..b232f8ed715 100644 --- a/jdk/test/java/lang/Character/CheckScript.java +++ b/jdk/test/java/lang/Character/CheckScript.java @@ -1,6 +1,6 @@ /* - * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2010, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,7 +24,7 @@ /** * @test - * @bug 6945564 6959267 7033561 7070436 7198195 + * @bug 6945564 6959267 7033561 7070436 7198195 8305681 * @summary Check that the j.l.Character.UnicodeScript */ diff --git a/jdk/test/java/lang/Character/Scripts.txt b/jdk/test/java/lang/Character/Scripts.txt index d9d21393f44..be0817e1400 100644 --- a/jdk/test/java/lang/Character/Scripts.txt +++ b/jdk/test/java/lang/Character/Scripts.txt @@ -1439,9 +1439,10 @@ FA70..FAD9 ; Han # Lo [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILI 20000..2A6D6 ; Han # Lo [42711] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6D6 2A700..2B734 ; Han # Lo [4149] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B734 2B740..2B81D ; Han # Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D +2B820..2CEA1 ; Han # Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 2F800..2FA1D ; Han # Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D -# Total code points: 75998 +# Total code points: 81760 # ================================================ diff --git a/jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java b/jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java index 8a0bad21a8b..5c219c6d3d6 100644 --- a/jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java +++ b/jdk/test/java/lang/Character/TestIsJavaIdentifierMethods.java @@ -25,19 +25,26 @@ * @test * @summary Test behavior of isJavaIdentifierXX, testIsJavaLetter, and * testIsJavaLetterOrDigit methods for all code points. - * @bug 8218915 + * @bug 8218915 8301400 8305681 */ public class TestIsJavaIdentifierMethods { // Unassigned code points not present in Unicode 6.2 (which Java SE 8 // is based upon), including: various currency symbol sign code points - // (Nordic Mark ... Bitcoin), Japanese Era Square character code point, - // and 35 CJK Unified Ideograph code points from GB18030-2022 + // (Nordic Mark ... Bitcoin), the Japanese Era Square character code point, and + // code points for GB1030-2022 level 1 and 2 implementation including + // (35 code points from CJK Unified Ideographs and all of CJK Unified Ideographs + // Extension E). private static final int CS_SIGNS_CODEPOINT_START = 0x20BB; private static final int CS_SIGNS_CODEPOINT_END = 0x20BF; private static final int JAPANESE_ERA_CODEPOINT = 0x32FF; - private static final int GB18030_2022_CODEPOINT_START = 0x9FCD; - private static final int GB18030_2022_CODEPOINT_END = 0x9FEF; + // GB18030_2022 Code Points + private static final int CJK_GB18030_LEVEL1_START = 0x9FCD; + private static final int CJK_GB18030_LEVEL1_END = 0x9FEF; + // Extension E code points are greater than U+FFFF, + // and thus only the int methods need to be tested + private static final int CJK_EXTENSION_E_START = 0x2B820; + private static final int CJK_EXTENSION_E_END = 0x2CEAF; public static void main(String[] args) { testIsJavaIdentifierPart_int(); @@ -77,7 +84,8 @@ public static void testIsJavaIdentifierPart_int() { // value of variable "expected" is considered false. if (cp != JAPANESE_ERA_CODEPOINT && !(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) && - !(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) { + !(cp >= CJK_GB18030_LEVEL1_START && cp <= CJK_GB18030_LEVEL1_END) && + !(cp >= CJK_EXTENSION_E_START && cp <= CJK_EXTENSION_E_END)) { byte type = (byte) Character.getType(cp); expected = Character.isLetter(cp) || type == Character.CURRENCY_SYMBOL @@ -125,7 +133,7 @@ public static void testIsJavaIdentifierPart_char() { // value of variable "expected" is considered false. if (i != JAPANESE_ERA_CODEPOINT && !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && - !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { + !(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) { byte type = (byte) Character.getType(ch); expected = Character.isLetter(ch) || type == Character.CURRENCY_SYMBOL @@ -168,7 +176,8 @@ public static void testIsJavaIdentifierStart_int() { // value of variable "expected" is considered false. if (cp != JAPANESE_ERA_CODEPOINT && !(cp >= CS_SIGNS_CODEPOINT_START && cp <= CS_SIGNS_CODEPOINT_END) && - !(cp >= GB18030_2022_CODEPOINT_START && cp <= GB18030_2022_CODEPOINT_END)) { + !(cp >= CJK_GB18030_LEVEL1_START && cp <= CJK_GB18030_LEVEL1_END) && + !(cp >= CJK_EXTENSION_E_START && cp <= CJK_EXTENSION_E_END)) { byte type = (byte) Character.getType(cp); expected = Character.isLetter(cp) || type == Character.LETTER_NUMBER @@ -208,7 +217,7 @@ public static void testIsJavaIdentifierStart_char() { // value of variable "expected" is considered false. if (i != JAPANESE_ERA_CODEPOINT && !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && - !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { + !(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) { byte type = (byte) Character.getType(ch); expected = Character.isLetter(ch) || type == Character.LETTER_NUMBER @@ -248,7 +257,7 @@ public static void testIsJavaLetter() { // value of variable "expected" is considered false. if (i != JAPANESE_ERA_CODEPOINT && !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && - !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { + !(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) { byte type = (byte) Character.getType(ch); expected = Character.isLetter(ch) || type == Character.LETTER_NUMBER @@ -292,7 +301,7 @@ public static void testIsJavaLetterOrDigit() { // value of variable "expected" is considered false. if (i != JAPANESE_ERA_CODEPOINT && !(i >= CS_SIGNS_CODEPOINT_START && i <= CS_SIGNS_CODEPOINT_END) && - !(i >= GB18030_2022_CODEPOINT_START && i <= GB18030_2022_CODEPOINT_END)) { + !(i >= CJK_GB18030_LEVEL1_START && i <= CJK_GB18030_LEVEL1_END)) { byte type = (byte) Character.getType(ch); expected = Character.isLetter(ch) || type == Character.CURRENCY_SYMBOL