Skip to content

Commit 4365206

Browse files
committed
Uni 15: new block Arabic_Extended_C with default Bidi_Class=AL
1 parent ee84402 commit 4365206

File tree

4 files changed

+16
-10
lines changed

4 files changed

+16
-10
lines changed

unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# DerivedBidiClass-15.0.0.txt
2-
# Date: 2021-12-09, 17:39:32 GMT
2+
# Date: 2021-12-09, 22:21:24 GMT
33
# © 2021 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use, see https://www.unicode.org/terms_of_use.html
@@ -15,14 +15,14 @@
1515
#
1616
# The unassigned code points that default to AL are in the ranges:
1717
# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF
18-
# \U00010D00-\U00010D3F \U00010F30-\U00010F6F
18+
# \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F
1919
# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
2020
#
2121
# This includes code points in the Arabic, Syriac, and Thaana blocks, among others.
2222
#
2323
# The unassigned code points that default to R are in the ranges:
2424
# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F
25-
# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF
25+
# \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF
2626
# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
2727
#
2828
# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others.
@@ -1244,7 +1244,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
12441244
10EAD ; R # Pd YEZIDI HYPHENATION MARK
12451245
10EAE..10EAF ; R # Cn [2] <reserved-10EAE>..<reserved-10EAF>
12461246
10EB0..10EB1 ; R # Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
1247-
10EB2..10EFC ; R # Cn [75] <reserved-10EB2>..<reserved-10EFC>
1247+
10EB2..10EBF ; R # Cn [14] <reserved-10EB2>..<reserved-10EBF>
12481248
10F00..10F1C ; R # Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
12491249
10F1D..10F26 ; R # No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
12501250
10F27 ; R # Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@@ -1272,7 +1272,7 @@ FB46..FB4F ; R # Lo [10] HEBREW LETTER TSADI WITH DAGESH..HEBREW LIGATURE AL
12721272
1ED50..1EDFF ; R # Cn [176] <reserved-1ED50>..<reserved-1EDFF>
12731273
1EF00..1EFFF ; R # Cn [256] <reserved-1EF00>..<reserved-1EFFF>
12741274

1275-
# Total code points: 3708
1275+
# Total code points: 3647
12761276

12771277
# ================================================
12781278

@@ -2388,6 +2388,7 @@ FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE>
23882388
10D00..10D23 ; AL # Lo [36] HANIFI ROHINGYA LETTER A..HANIFI ROHINGYA MARK NA KHONNA
23892389
10D28..10D2F ; AL # Cn [8] <reserved-10D28>..<reserved-10D2F>
23902390
10D3A..10D3F ; AL # Cn [6] <reserved-10D3A>..<reserved-10D3F>
2391+
10EC0..10EFC ; AL # Cn [61] <reserved-10EC0>..<reserved-10EFC>
23912392
10F30..10F45 ; AL # Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
23922393
10F51..10F54 ; AL # No [4] SOGDIAN NUMBER ONE..SOGDIAN NUMBER ONE HUNDRED
23932394
10F55..10F59 ; AL # Po [5] SOGDIAN PUNCTUATION TWO VERTICAL BARS..SOGDIAN PUNCTUATION HALF CIRCLE WITH DOT
@@ -2472,7 +2473,7 @@ FEFD..FEFE ; AL # Cn [2] <reserved-FEFD>..<reserved-FEFE>
24722473
1EEBC..1EEEF ; AL # Cn [52] <reserved-1EEBC>..<reserved-1EEEF>
24732474
1EEF2..1EEFF ; AL # Cn [14] <reserved-1EEF2>..<reserved-1EEFF>
24742475

2475-
# Total code points: 1708
2476+
# Total code points: 1769
24762477

24772478
# ================================================
24782479

unicodetools/src/main/java/org/unicode/text/UCD/UCD.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,11 @@ public byte getBidiClass(int codePoint) {
491491
// New block 0870..089F "Arabic Extended-B" defaults to bc=AL.
492492
blockData.keySet("Arabic_Extended_B", BIDI_AL_SET);
493493
}
494+
if (versionInfo.getMajor() >= 15) {
495+
// Unicode 15:
496+
// New block 10EC0..10EFF "Arabic Extended-C" defaults to bc=AL.
497+
blockData.keySet("Arabic_Extended_C", BIDI_AL_SET);
498+
}
494499
BIDI_R_Delta.removeAll(BIDI_R_SET).removeAll(BIDI_AL_SET);
495500
if (SHOW_LOADING) {
496501
System.out.println("R: Adding " + BIDI_R_Delta);

unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,14 +238,14 @@ Property: Bidi_Class
238238
#
239239
# The unassigned code points that default to AL are in the ranges:
240240
# [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF
241-
# \U00010D00-\U00010D3F \U00010F30-\U00010F6F
241+
# \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F
242242
# \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
243243
#
244244
# This includes code points in the Arabic, Syriac, and Thaana blocks, among others.
245245
#
246246
# The unassigned code points that default to R are in the ranges:
247247
# [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F
248-
# \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF
248+
# \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF
249249
# \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
250250
#
251251
# This includes code points in the Hebrew, NKo, and Phoenician blocks, among others.

unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ Let $BMExclusions =[≠ ∤ ∦ ≢ \u2ADC]
205205
In [\p{dt=canonical}-$BMExclusions] Bidi_M * \P{bc=NSM} * dm = Bidi_M * \P{bc=NSM}
206206

207207
# Additional BIDI invariant constants
208-
Let $AL_blocks = [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U00010D00-\U00010D3F \U00010F30-\U00010F6F \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
209-
Let $R_blocks = [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F \U00010800-\U00010CFF \U00010D40-\U00010F2F \U00010F70-\U00010FFF \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
208+
Let $AL_blocks = [\u0600-\u07BF \u0860-\u08FF \uFB50-\uFDCF \uFDF0-\uFDFF \uFE70-\uFEFF \U00010D00-\U00010D3F \U00010EC0-\U00010EFF \U00010F30-\U00010F6F \U0001EC70-\U0001ECBF \U0001ED00-\U0001ED4F \U0001EE00-\U0001EEFF]
209+
Let $R_blocks = [\u0590-\u05FF \u07C0-\u085F \uFB1D-\uFB4F \U00010800-\U00010CFF \U00010D40-\U00010EBF \U00010F00-\U00010F2F \U00010F70-\U00010FFF \U0001E800-\U0001EC6F \U0001ECC0-\U0001ECFF \U0001ED50-\U0001EDFF \U0001EF00-\U0001EFFF]
210210
# 6.1.0 updated blocks
211211
# 10.0 updated blocks (Syriac Supplement is bc=AL)
212212
# 11.0 updated blocks (Hanifi Rohingya, Sogdian, Indic Siyaq Numbers are bc=AL); Old Sogdian is bc=R

0 commit comments

Comments
 (0)