diff --git a/Lib/fontTools/unicodedata/Blocks.py b/Lib/fontTools/unicodedata/Blocks.py index 0755074b74..2b30be67b7 100644 --- a/Lib/fontTools/unicodedata/Blocks.py +++ b/Lib/fontTools/unicodedata/Blocks.py @@ -4,9 +4,9 @@ # Source: https://unicode.org/Public/UNIDATA/Blocks.txt # License: http://unicode.org/copyright.html#License # -# Blocks-13.0.0.txt -# Date: 2019-07-10, 19:06:00 GMT [KW] -# © 2019 Unicode®, Inc. +# Blocks-14.0.0.txt +# Date: 2021-01-22, 23:29:00 GMT [KW] +# © 2021 Unicode®, Inc. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database @@ -37,7 +37,7 @@ 0x0800, # .. 0x083F ; Samaritan 0x0840, # .. 0x085F ; Mandaic 0x0860, # .. 0x086F ; Syriac Supplement - 0x0870, # .. 0x089F ; No_Block + 0x0870, # .. 0x089F ; Arabic Extended-B 0x08A0, # .. 0x08FF ; Arabic Extended-A 0x0900, # .. 0x097F ; Devanagari 0x0980, # .. 0x09FF ; Bengali @@ -204,9 +204,11 @@ 0x104B0, # .. 0x104FF ; Osage 0x10500, # .. 0x1052F ; Elbasan 0x10530, # .. 0x1056F ; Caucasian Albanian - 0x10570, # .. 0x105FF ; No_Block + 0x10570, # .. 0x105BF ; Vithkuqi + 0x105C0, # .. 0x105FF ; No_Block 0x10600, # .. 0x1077F ; Linear A - 0x10780, # .. 0x107FF ; No_Block + 0x10780, # .. 0x107BF ; Latin Extended-F + 0x107C0, # .. 0x107FF ; No_Block 0x10800, # .. 0x1083F ; Cypriot Syllabary 0x10840, # .. 0x1085F ; Imperial Aramaic 0x10860, # .. 0x1087F ; Palmyrene @@ -238,7 +240,7 @@ 0x10EC0, # .. 0x10EFF ; No_Block 0x10F00, # .. 0x10F2F ; Old Sogdian 0x10F30, # .. 0x10F6F ; Sogdian - 0x10F70, # .. 0x10FAF ; No_Block + 0x10F70, # .. 0x10FAF ; Old Uyghur 0x10FB0, # .. 0x10FDF ; Chorasmian 0x10FE0, # .. 0x10FFF ; Elymaic 0x11000, # .. 0x1107F ; Brahmi @@ -262,8 +264,8 @@ 0x11660, # .. 0x1167F ; Mongolian Supplement 0x11680, # .. 0x116CF ; Takri 0x116D0, # .. 0x116FF ; No_Block - 0x11700, # .. 0x1173F ; Ahom - 0x11740, # .. 0x117FF ; No_Block + 0x11700, # .. 0x1174F ; Ahom + 0x11750, # .. 0x117FF ; No_Block 0x11800, # .. 0x1184F ; Dogra 0x11850, # .. 0x1189F ; No_Block 0x118A0, # .. 0x118FF ; Warang Citi @@ -272,7 +274,7 @@ 0x119A0, # .. 0x119FF ; Nandinagari 0x11A00, # .. 0x11A4F ; Zanabazar Square 0x11A50, # .. 0x11AAF ; Soyombo - 0x11AB0, # .. 0x11ABF ; No_Block + 0x11AB0, # .. 0x11ABF ; Unified Canadian Aboriginal Syllabics Extended-A 0x11AC0, # .. 0x11AFF ; Pau Cin Hau 0x11B00, # .. 0x11BFF ; No_Block 0x11C00, # .. 0x11C6F ; Bhaiksuki @@ -288,7 +290,8 @@ 0x12000, # .. 0x123FF ; Cuneiform 0x12400, # .. 0x1247F ; Cuneiform Numbers and Punctuation 0x12480, # .. 0x1254F ; Early Dynastic Cuneiform - 0x12550, # .. 0x12FFF ; No_Block + 0x12550, # .. 0x12F8F ; No_Block + 0x12F90, # .. 0x12FFF ; Cypro-Minoan 0x13000, # .. 0x1342F ; Egyptian Hieroglyphs 0x13430, # .. 0x1343F ; Egyptian Hieroglyph Format Controls 0x13440, # .. 0x143FF ; No_Block @@ -296,7 +299,7 @@ 0x14680, # .. 0x167FF ; No_Block 0x16800, # .. 0x16A3F ; Bamum Supplement 0x16A40, # .. 0x16A6F ; Mro - 0x16A70, # .. 0x16ACF ; No_Block + 0x16A70, # .. 0x16ACF ; Tangsa 0x16AD0, # .. 0x16AFF ; Bassa Vah 0x16B00, # .. 0x16B8F ; Pahawh Hmong 0x16B90, # .. 0x16E3F ; No_Block @@ -308,8 +311,9 @@ 0x17000, # .. 0x187FF ; Tangut 0x18800, # .. 0x18AFF ; Tangut Components 0x18B00, # .. 0x18CFF ; Khitan Small Script - 0x18D00, # .. 0x18D8F ; Tangut Supplement - 0x18D90, # .. 0x1AFFF ; No_Block + 0x18D00, # .. 0x18D7F ; Tangut Supplement + 0x18D80, # .. 0x1AFEF ; No_Block + 0x1AFF0, # .. 0x1AFFF ; Kana Extended-B 0x1B000, # .. 0x1B0FF ; Kana Supplement 0x1B100, # .. 0x1B12F ; Kana Extended-A 0x1B130, # .. 0x1B16F ; Small Kana Extension @@ -317,7 +321,9 @@ 0x1B300, # .. 0x1BBFF ; No_Block 0x1BC00, # .. 0x1BC9F ; Duployan 0x1BCA0, # .. 0x1BCAF ; Shorthand Format Controls - 0x1BCB0, # .. 0x1CFFF ; No_Block + 0x1BCB0, # .. 0x1CEFF ; No_Block + 0x1CF00, # .. 0x1CFCF ; Znamenny Musical Notation + 0x1CFD0, # .. 0x1CFFF ; No_Block 0x1D000, # .. 0x1D0FF ; Byzantine Musical Symbols 0x1D100, # .. 0x1D1FF ; Musical Symbols 0x1D200, # .. 0x1D24F ; Ancient Greek Musical Notation @@ -328,13 +334,16 @@ 0x1D380, # .. 0x1D3FF ; No_Block 0x1D400, # .. 0x1D7FF ; Mathematical Alphanumeric Symbols 0x1D800, # .. 0x1DAAF ; Sutton SignWriting - 0x1DAB0, # .. 0x1DFFF ; No_Block + 0x1DAB0, # .. 0x1DEFF ; No_Block + 0x1DF00, # .. 0x1DFFF ; Latin Extended-G 0x1E000, # .. 0x1E02F ; Glagolitic Supplement 0x1E030, # .. 0x1E0FF ; No_Block 0x1E100, # .. 0x1E14F ; Nyiakeng Puachue Hmong - 0x1E150, # .. 0x1E2BF ; No_Block + 0x1E150, # .. 0x1E28F ; No_Block + 0x1E290, # .. 0x1E2BF ; Toto 0x1E2C0, # .. 0x1E2FF ; Wancho - 0x1E300, # .. 0x1E7FF ; No_Block + 0x1E300, # .. 0x1E7DF ; No_Block + 0x1E7E0, # .. 0x1E7FF ; Ethiopic Extended-B 0x1E800, # .. 0x1E8DF ; Mende Kikakui 0x1E8E0, # .. 0x1E8FF ; No_Block 0x1E900, # .. 0x1E95F ; Adlam @@ -382,366 +391,375 @@ ] VALUES = [ - 'Basic Latin', # 0000..007F - 'Latin-1 Supplement', # 0080..00FF - 'Latin Extended-A', # 0100..017F - 'Latin Extended-B', # 0180..024F - 'IPA Extensions', # 0250..02AF - 'Spacing Modifier Letters', # 02B0..02FF - 'Combining Diacritical Marks', # 0300..036F - 'Greek and Coptic', # 0370..03FF - 'Cyrillic', # 0400..04FF - 'Cyrillic Supplement', # 0500..052F - 'Armenian', # 0530..058F - 'Hebrew', # 0590..05FF - 'Arabic', # 0600..06FF - 'Syriac', # 0700..074F - 'Arabic Supplement', # 0750..077F - 'Thaana', # 0780..07BF - 'NKo', # 07C0..07FF - 'Samaritan', # 0800..083F - 'Mandaic', # 0840..085F - 'Syriac Supplement', # 0860..086F - 'No_Block', # 0870..089F - 'Arabic Extended-A', # 08A0..08FF - 'Devanagari', # 0900..097F - 'Bengali', # 0980..09FF - 'Gurmukhi', # 0A00..0A7F - 'Gujarati', # 0A80..0AFF - 'Oriya', # 0B00..0B7F - 'Tamil', # 0B80..0BFF - 'Telugu', # 0C00..0C7F - 'Kannada', # 0C80..0CFF - 'Malayalam', # 0D00..0D7F - 'Sinhala', # 0D80..0DFF - 'Thai', # 0E00..0E7F - 'Lao', # 0E80..0EFF - 'Tibetan', # 0F00..0FFF - 'Myanmar', # 1000..109F - 'Georgian', # 10A0..10FF - 'Hangul Jamo', # 1100..11FF - 'Ethiopic', # 1200..137F - 'Ethiopic Supplement', # 1380..139F - 'Cherokee', # 13A0..13FF - 'Unified Canadian Aboriginal Syllabics', # 1400..167F - 'Ogham', # 1680..169F - 'Runic', # 16A0..16FF - 'Tagalog', # 1700..171F - 'Hanunoo', # 1720..173F - 'Buhid', # 1740..175F - 'Tagbanwa', # 1760..177F - 'Khmer', # 1780..17FF - 'Mongolian', # 1800..18AF - 'Unified Canadian Aboriginal Syllabics Extended', # 18B0..18FF - 'Limbu', # 1900..194F - 'Tai Le', # 1950..197F - 'New Tai Lue', # 1980..19DF - 'Khmer Symbols', # 19E0..19FF - 'Buginese', # 1A00..1A1F - 'Tai Tham', # 1A20..1AAF - 'Combining Diacritical Marks Extended', # 1AB0..1AFF - 'Balinese', # 1B00..1B7F - 'Sundanese', # 1B80..1BBF - 'Batak', # 1BC0..1BFF - 'Lepcha', # 1C00..1C4F - 'Ol Chiki', # 1C50..1C7F - 'Cyrillic Extended-C', # 1C80..1C8F - 'Georgian Extended', # 1C90..1CBF - 'Sundanese Supplement', # 1CC0..1CCF - 'Vedic Extensions', # 1CD0..1CFF - 'Phonetic Extensions', # 1D00..1D7F - 'Phonetic Extensions Supplement', # 1D80..1DBF - 'Combining Diacritical Marks Supplement', # 1DC0..1DFF - 'Latin Extended Additional', # 1E00..1EFF - 'Greek Extended', # 1F00..1FFF - 'General Punctuation', # 2000..206F - 'Superscripts and Subscripts', # 2070..209F - 'Currency Symbols', # 20A0..20CF - 'Combining Diacritical Marks for Symbols', # 20D0..20FF - 'Letterlike Symbols', # 2100..214F - 'Number Forms', # 2150..218F - 'Arrows', # 2190..21FF - 'Mathematical Operators', # 2200..22FF - 'Miscellaneous Technical', # 2300..23FF - 'Control Pictures', # 2400..243F - 'Optical Character Recognition', # 2440..245F - 'Enclosed Alphanumerics', # 2460..24FF - 'Box Drawing', # 2500..257F - 'Block Elements', # 2580..259F - 'Geometric Shapes', # 25A0..25FF - 'Miscellaneous Symbols', # 2600..26FF - 'Dingbats', # 2700..27BF - 'Miscellaneous Mathematical Symbols-A', # 27C0..27EF - 'Supplemental Arrows-A', # 27F0..27FF - 'Braille Patterns', # 2800..28FF - 'Supplemental Arrows-B', # 2900..297F - 'Miscellaneous Mathematical Symbols-B', # 2980..29FF - 'Supplemental Mathematical Operators', # 2A00..2AFF - 'Miscellaneous Symbols and Arrows', # 2B00..2BFF - 'Glagolitic', # 2C00..2C5F - 'Latin Extended-C', # 2C60..2C7F - 'Coptic', # 2C80..2CFF - 'Georgian Supplement', # 2D00..2D2F - 'Tifinagh', # 2D30..2D7F - 'Ethiopic Extended', # 2D80..2DDF - 'Cyrillic Extended-A', # 2DE0..2DFF - 'Supplemental Punctuation', # 2E00..2E7F - 'CJK Radicals Supplement', # 2E80..2EFF - 'Kangxi Radicals', # 2F00..2FDF - 'No_Block', # 2FE0..2FEF - 'Ideographic Description Characters', # 2FF0..2FFF - 'CJK Symbols and Punctuation', # 3000..303F - 'Hiragana', # 3040..309F - 'Katakana', # 30A0..30FF - 'Bopomofo', # 3100..312F - 'Hangul Compatibility Jamo', # 3130..318F - 'Kanbun', # 3190..319F - 'Bopomofo Extended', # 31A0..31BF - 'CJK Strokes', # 31C0..31EF - 'Katakana Phonetic Extensions', # 31F0..31FF - 'Enclosed CJK Letters and Months', # 3200..32FF - 'CJK Compatibility', # 3300..33FF - 'CJK Unified Ideographs Extension A', # 3400..4DBF - 'Yijing Hexagram Symbols', # 4DC0..4DFF - 'CJK Unified Ideographs', # 4E00..9FFF - 'Yi Syllables', # A000..A48F - 'Yi Radicals', # A490..A4CF - 'Lisu', # A4D0..A4FF - 'Vai', # A500..A63F - 'Cyrillic Extended-B', # A640..A69F - 'Bamum', # A6A0..A6FF - 'Modifier Tone Letters', # A700..A71F - 'Latin Extended-D', # A720..A7FF - 'Syloti Nagri', # A800..A82F - 'Common Indic Number Forms', # A830..A83F - 'Phags-pa', # A840..A87F - 'Saurashtra', # A880..A8DF - 'Devanagari Extended', # A8E0..A8FF - 'Kayah Li', # A900..A92F - 'Rejang', # A930..A95F - 'Hangul Jamo Extended-A', # A960..A97F - 'Javanese', # A980..A9DF - 'Myanmar Extended-B', # A9E0..A9FF - 'Cham', # AA00..AA5F - 'Myanmar Extended-A', # AA60..AA7F - 'Tai Viet', # AA80..AADF - 'Meetei Mayek Extensions', # AAE0..AAFF - 'Ethiopic Extended-A', # AB00..AB2F - 'Latin Extended-E', # AB30..AB6F - 'Cherokee Supplement', # AB70..ABBF - 'Meetei Mayek', # ABC0..ABFF - 'Hangul Syllables', # AC00..D7AF - 'Hangul Jamo Extended-B', # D7B0..D7FF - 'High Surrogates', # D800..DB7F - 'High Private Use Surrogates', # DB80..DBFF - 'Low Surrogates', # DC00..DFFF - 'Private Use Area', # E000..F8FF - 'CJK Compatibility Ideographs', # F900..FAFF - 'Alphabetic Presentation Forms', # FB00..FB4F - 'Arabic Presentation Forms-A', # FB50..FDFF - 'Variation Selectors', # FE00..FE0F - 'Vertical Forms', # FE10..FE1F - 'Combining Half Marks', # FE20..FE2F - 'CJK Compatibility Forms', # FE30..FE4F - 'Small Form Variants', # FE50..FE6F - 'Arabic Presentation Forms-B', # FE70..FEFF - 'Halfwidth and Fullwidth Forms', # FF00..FFEF - 'Specials', # FFF0..FFFF - 'Linear B Syllabary', # 10000..1007F - 'Linear B Ideograms', # 10080..100FF - 'Aegean Numbers', # 10100..1013F - 'Ancient Greek Numbers', # 10140..1018F - 'Ancient Symbols', # 10190..101CF - 'Phaistos Disc', # 101D0..101FF - 'No_Block', # 10200..1027F - 'Lycian', # 10280..1029F - 'Carian', # 102A0..102DF - 'Coptic Epact Numbers', # 102E0..102FF - 'Old Italic', # 10300..1032F - 'Gothic', # 10330..1034F - 'Old Permic', # 10350..1037F - 'Ugaritic', # 10380..1039F - 'Old Persian', # 103A0..103DF - 'No_Block', # 103E0..103FF - 'Deseret', # 10400..1044F - 'Shavian', # 10450..1047F - 'Osmanya', # 10480..104AF - 'Osage', # 104B0..104FF - 'Elbasan', # 10500..1052F - 'Caucasian Albanian', # 10530..1056F - 'No_Block', # 10570..105FF - 'Linear A', # 10600..1077F - 'No_Block', # 10780..107FF - 'Cypriot Syllabary', # 10800..1083F - 'Imperial Aramaic', # 10840..1085F - 'Palmyrene', # 10860..1087F - 'Nabataean', # 10880..108AF - 'No_Block', # 108B0..108DF - 'Hatran', # 108E0..108FF - 'Phoenician', # 10900..1091F - 'Lydian', # 10920..1093F - 'No_Block', # 10940..1097F - 'Meroitic Hieroglyphs', # 10980..1099F - 'Meroitic Cursive', # 109A0..109FF - 'Kharoshthi', # 10A00..10A5F - 'Old South Arabian', # 10A60..10A7F - 'Old North Arabian', # 10A80..10A9F - 'No_Block', # 10AA0..10ABF - 'Manichaean', # 10AC0..10AFF - 'Avestan', # 10B00..10B3F - 'Inscriptional Parthian', # 10B40..10B5F - 'Inscriptional Pahlavi', # 10B60..10B7F - 'Psalter Pahlavi', # 10B80..10BAF - 'No_Block', # 10BB0..10BFF - 'Old Turkic', # 10C00..10C4F - 'No_Block', # 10C50..10C7F - 'Old Hungarian', # 10C80..10CFF - 'Hanifi Rohingya', # 10D00..10D3F - 'No_Block', # 10D40..10E5F - 'Rumi Numeral Symbols', # 10E60..10E7F - 'Yezidi', # 10E80..10EBF - 'No_Block', # 10EC0..10EFF - 'Old Sogdian', # 10F00..10F2F - 'Sogdian', # 10F30..10F6F - 'No_Block', # 10F70..10FAF - 'Chorasmian', # 10FB0..10FDF - 'Elymaic', # 10FE0..10FFF - 'Brahmi', # 11000..1107F - 'Kaithi', # 11080..110CF - 'Sora Sompeng', # 110D0..110FF - 'Chakma', # 11100..1114F - 'Mahajani', # 11150..1117F - 'Sharada', # 11180..111DF - 'Sinhala Archaic Numbers', # 111E0..111FF - 'Khojki', # 11200..1124F - 'No_Block', # 11250..1127F - 'Multani', # 11280..112AF - 'Khudawadi', # 112B0..112FF - 'Grantha', # 11300..1137F - 'No_Block', # 11380..113FF - 'Newa', # 11400..1147F - 'Tirhuta', # 11480..114DF - 'No_Block', # 114E0..1157F - 'Siddham', # 11580..115FF - 'Modi', # 11600..1165F - 'Mongolian Supplement', # 11660..1167F - 'Takri', # 11680..116CF - 'No_Block', # 116D0..116FF - 'Ahom', # 11700..1173F - 'No_Block', # 11740..117FF - 'Dogra', # 11800..1184F - 'No_Block', # 11850..1189F - 'Warang Citi', # 118A0..118FF - 'Dives Akuru', # 11900..1195F - 'No_Block', # 11960..1199F - 'Nandinagari', # 119A0..119FF - 'Zanabazar Square', # 11A00..11A4F - 'Soyombo', # 11A50..11AAF - 'No_Block', # 11AB0..11ABF - 'Pau Cin Hau', # 11AC0..11AFF - 'No_Block', # 11B00..11BFF - 'Bhaiksuki', # 11C00..11C6F - 'Marchen', # 11C70..11CBF - 'No_Block', # 11CC0..11CFF - 'Masaram Gondi', # 11D00..11D5F - 'Gunjala Gondi', # 11D60..11DAF - 'No_Block', # 11DB0..11EDF - 'Makasar', # 11EE0..11EFF - 'No_Block', # 11F00..11FAF - 'Lisu Supplement', # 11FB0..11FBF - 'Tamil Supplement', # 11FC0..11FFF - 'Cuneiform', # 12000..123FF - 'Cuneiform Numbers and Punctuation', # 12400..1247F - 'Early Dynastic Cuneiform', # 12480..1254F - 'No_Block', # 12550..12FFF - 'Egyptian Hieroglyphs', # 13000..1342F - 'Egyptian Hieroglyph Format Controls', # 13430..1343F - 'No_Block', # 13440..143FF - 'Anatolian Hieroglyphs', # 14400..1467F - 'No_Block', # 14680..167FF - 'Bamum Supplement', # 16800..16A3F - 'Mro', # 16A40..16A6F - 'No_Block', # 16A70..16ACF - 'Bassa Vah', # 16AD0..16AFF - 'Pahawh Hmong', # 16B00..16B8F - 'No_Block', # 16B90..16E3F - 'Medefaidrin', # 16E40..16E9F - 'No_Block', # 16EA0..16EFF - 'Miao', # 16F00..16F9F - 'No_Block', # 16FA0..16FDF - 'Ideographic Symbols and Punctuation', # 16FE0..16FFF - 'Tangut', # 17000..187FF - 'Tangut Components', # 18800..18AFF - 'Khitan Small Script', # 18B00..18CFF - 'Tangut Supplement', # 18D00..18D8F - 'No_Block', # 18D90..1AFFF - 'Kana Supplement', # 1B000..1B0FF - 'Kana Extended-A', # 1B100..1B12F - 'Small Kana Extension', # 1B130..1B16F - 'Nushu', # 1B170..1B2FF - 'No_Block', # 1B300..1BBFF - 'Duployan', # 1BC00..1BC9F - 'Shorthand Format Controls', # 1BCA0..1BCAF - 'No_Block', # 1BCB0..1CFFF - 'Byzantine Musical Symbols', # 1D000..1D0FF - 'Musical Symbols', # 1D100..1D1FF - 'Ancient Greek Musical Notation', # 1D200..1D24F - 'No_Block', # 1D250..1D2DF - 'Mayan Numerals', # 1D2E0..1D2FF - 'Tai Xuan Jing Symbols', # 1D300..1D35F - 'Counting Rod Numerals', # 1D360..1D37F - 'No_Block', # 1D380..1D3FF - 'Mathematical Alphanumeric Symbols', # 1D400..1D7FF - 'Sutton SignWriting', # 1D800..1DAAF - 'No_Block', # 1DAB0..1DFFF - 'Glagolitic Supplement', # 1E000..1E02F - 'No_Block', # 1E030..1E0FF - 'Nyiakeng Puachue Hmong', # 1E100..1E14F - 'No_Block', # 1E150..1E2BF - 'Wancho', # 1E2C0..1E2FF - 'No_Block', # 1E300..1E7FF - 'Mende Kikakui', # 1E800..1E8DF - 'No_Block', # 1E8E0..1E8FF - 'Adlam', # 1E900..1E95F - 'No_Block', # 1E960..1EC6F - 'Indic Siyaq Numbers', # 1EC70..1ECBF - 'No_Block', # 1ECC0..1ECFF - 'Ottoman Siyaq Numbers', # 1ED00..1ED4F - 'No_Block', # 1ED50..1EDFF - 'Arabic Mathematical Alphabetic Symbols', # 1EE00..1EEFF - 'No_Block', # 1EF00..1EFFF - 'Mahjong Tiles', # 1F000..1F02F - 'Domino Tiles', # 1F030..1F09F - 'Playing Cards', # 1F0A0..1F0FF - 'Enclosed Alphanumeric Supplement', # 1F100..1F1FF - 'Enclosed Ideographic Supplement', # 1F200..1F2FF - 'Miscellaneous Symbols and Pictographs', # 1F300..1F5FF - 'Emoticons', # 1F600..1F64F - 'Ornamental Dingbats', # 1F650..1F67F - 'Transport and Map Symbols', # 1F680..1F6FF - 'Alchemical Symbols', # 1F700..1F77F - 'Geometric Shapes Extended', # 1F780..1F7FF - 'Supplemental Arrows-C', # 1F800..1F8FF - 'Supplemental Symbols and Pictographs', # 1F900..1F9FF - 'Chess Symbols', # 1FA00..1FA6F - 'Symbols and Pictographs Extended-A', # 1FA70..1FAFF - 'Symbols for Legacy Computing', # 1FB00..1FBFF - 'No_Block', # 1FC00..1FFFF - 'CJK Unified Ideographs Extension B', # 20000..2A6DF - 'No_Block', # 2A6E0..2A6FF - 'CJK Unified Ideographs Extension C', # 2A700..2B73F - 'CJK Unified Ideographs Extension D', # 2B740..2B81F - 'CJK Unified Ideographs Extension E', # 2B820..2CEAF - 'CJK Unified Ideographs Extension F', # 2CEB0..2EBEF - 'No_Block', # 2EBF0..2F7FF - 'CJK Compatibility Ideographs Supplement', # 2F800..2FA1F - 'No_Block', # 2FA20..2FFFF - 'CJK Unified Ideographs Extension G', # 30000..3134F - 'No_Block', # 31350..DFFFF - 'Tags', # E0000..E007F - 'No_Block', # E0080..E00FF - 'Variation Selectors Supplement', # E0100..E01EF - 'No_Block', # E01F0..EFFFF - 'Supplementary Private Use Area-A', # F0000..FFFFF - 'Supplementary Private Use Area-B', # 100000..10FFFF + 'Basic Latin', # 0000..007F + 'Latin-1 Supplement', # 0080..00FF + 'Latin Extended-A', # 0100..017F + 'Latin Extended-B', # 0180..024F + 'IPA Extensions', # 0250..02AF + 'Spacing Modifier Letters', # 02B0..02FF + 'Combining Diacritical Marks', # 0300..036F + 'Greek and Coptic', # 0370..03FF + 'Cyrillic', # 0400..04FF + 'Cyrillic Supplement', # 0500..052F + 'Armenian', # 0530..058F + 'Hebrew', # 0590..05FF + 'Arabic', # 0600..06FF + 'Syriac', # 0700..074F + 'Arabic Supplement', # 0750..077F + 'Thaana', # 0780..07BF + 'NKo', # 07C0..07FF + 'Samaritan', # 0800..083F + 'Mandaic', # 0840..085F + 'Syriac Supplement', # 0860..086F + 'Arabic Extended-B', # 0870..089F + 'Arabic Extended-A', # 08A0..08FF + 'Devanagari', # 0900..097F + 'Bengali', # 0980..09FF + 'Gurmukhi', # 0A00..0A7F + 'Gujarati', # 0A80..0AFF + 'Oriya', # 0B00..0B7F + 'Tamil', # 0B80..0BFF + 'Telugu', # 0C00..0C7F + 'Kannada', # 0C80..0CFF + 'Malayalam', # 0D00..0D7F + 'Sinhala', # 0D80..0DFF + 'Thai', # 0E00..0E7F + 'Lao', # 0E80..0EFF + 'Tibetan', # 0F00..0FFF + 'Myanmar', # 1000..109F + 'Georgian', # 10A0..10FF + 'Hangul Jamo', # 1100..11FF + 'Ethiopic', # 1200..137F + 'Ethiopic Supplement', # 1380..139F + 'Cherokee', # 13A0..13FF + 'Unified Canadian Aboriginal Syllabics', # 1400..167F + 'Ogham', # 1680..169F + 'Runic', # 16A0..16FF + 'Tagalog', # 1700..171F + 'Hanunoo', # 1720..173F + 'Buhid', # 1740..175F + 'Tagbanwa', # 1760..177F + 'Khmer', # 1780..17FF + 'Mongolian', # 1800..18AF + 'Unified Canadian Aboriginal Syllabics Extended', # 18B0..18FF + 'Limbu', # 1900..194F + 'Tai Le', # 1950..197F + 'New Tai Lue', # 1980..19DF + 'Khmer Symbols', # 19E0..19FF + 'Buginese', # 1A00..1A1F + 'Tai Tham', # 1A20..1AAF + 'Combining Diacritical Marks Extended', # 1AB0..1AFF + 'Balinese', # 1B00..1B7F + 'Sundanese', # 1B80..1BBF + 'Batak', # 1BC0..1BFF + 'Lepcha', # 1C00..1C4F + 'Ol Chiki', # 1C50..1C7F + 'Cyrillic Extended-C', # 1C80..1C8F + 'Georgian Extended', # 1C90..1CBF + 'Sundanese Supplement', # 1CC0..1CCF + 'Vedic Extensions', # 1CD0..1CFF + 'Phonetic Extensions', # 1D00..1D7F + 'Phonetic Extensions Supplement', # 1D80..1DBF + 'Combining Diacritical Marks Supplement', # 1DC0..1DFF + 'Latin Extended Additional', # 1E00..1EFF + 'Greek Extended', # 1F00..1FFF + 'General Punctuation', # 2000..206F + 'Superscripts and Subscripts', # 2070..209F + 'Currency Symbols', # 20A0..20CF + 'Combining Diacritical Marks for Symbols', # 20D0..20FF + 'Letterlike Symbols', # 2100..214F + 'Number Forms', # 2150..218F + 'Arrows', # 2190..21FF + 'Mathematical Operators', # 2200..22FF + 'Miscellaneous Technical', # 2300..23FF + 'Control Pictures', # 2400..243F + 'Optical Character Recognition', # 2440..245F + 'Enclosed Alphanumerics', # 2460..24FF + 'Box Drawing', # 2500..257F + 'Block Elements', # 2580..259F + 'Geometric Shapes', # 25A0..25FF + 'Miscellaneous Symbols', # 2600..26FF + 'Dingbats', # 2700..27BF + 'Miscellaneous Mathematical Symbols-A', # 27C0..27EF + 'Supplemental Arrows-A', # 27F0..27FF + 'Braille Patterns', # 2800..28FF + 'Supplemental Arrows-B', # 2900..297F + 'Miscellaneous Mathematical Symbols-B', # 2980..29FF + 'Supplemental Mathematical Operators', # 2A00..2AFF + 'Miscellaneous Symbols and Arrows', # 2B00..2BFF + 'Glagolitic', # 2C00..2C5F + 'Latin Extended-C', # 2C60..2C7F + 'Coptic', # 2C80..2CFF + 'Georgian Supplement', # 2D00..2D2F + 'Tifinagh', # 2D30..2D7F + 'Ethiopic Extended', # 2D80..2DDF + 'Cyrillic Extended-A', # 2DE0..2DFF + 'Supplemental Punctuation', # 2E00..2E7F + 'CJK Radicals Supplement', # 2E80..2EFF + 'Kangxi Radicals', # 2F00..2FDF + 'No_Block', # 2FE0..2FEF + 'Ideographic Description Characters', # 2FF0..2FFF + 'CJK Symbols and Punctuation', # 3000..303F + 'Hiragana', # 3040..309F + 'Katakana', # 30A0..30FF + 'Bopomofo', # 3100..312F + 'Hangul Compatibility Jamo', # 3130..318F + 'Kanbun', # 3190..319F + 'Bopomofo Extended', # 31A0..31BF + 'CJK Strokes', # 31C0..31EF + 'Katakana Phonetic Extensions', # 31F0..31FF + 'Enclosed CJK Letters and Months', # 3200..32FF + 'CJK Compatibility', # 3300..33FF + 'CJK Unified Ideographs Extension A', # 3400..4DBF + 'Yijing Hexagram Symbols', # 4DC0..4DFF + 'CJK Unified Ideographs', # 4E00..9FFF + 'Yi Syllables', # A000..A48F + 'Yi Radicals', # A490..A4CF + 'Lisu', # A4D0..A4FF + 'Vai', # A500..A63F + 'Cyrillic Extended-B', # A640..A69F + 'Bamum', # A6A0..A6FF + 'Modifier Tone Letters', # A700..A71F + 'Latin Extended-D', # A720..A7FF + 'Syloti Nagri', # A800..A82F + 'Common Indic Number Forms', # A830..A83F + 'Phags-pa', # A840..A87F + 'Saurashtra', # A880..A8DF + 'Devanagari Extended', # A8E0..A8FF + 'Kayah Li', # A900..A92F + 'Rejang', # A930..A95F + 'Hangul Jamo Extended-A', # A960..A97F + 'Javanese', # A980..A9DF + 'Myanmar Extended-B', # A9E0..A9FF + 'Cham', # AA00..AA5F + 'Myanmar Extended-A', # AA60..AA7F + 'Tai Viet', # AA80..AADF + 'Meetei Mayek Extensions', # AAE0..AAFF + 'Ethiopic Extended-A', # AB00..AB2F + 'Latin Extended-E', # AB30..AB6F + 'Cherokee Supplement', # AB70..ABBF + 'Meetei Mayek', # ABC0..ABFF + 'Hangul Syllables', # AC00..D7AF + 'Hangul Jamo Extended-B', # D7B0..D7FF + 'High Surrogates', # D800..DB7F + 'High Private Use Surrogates', # DB80..DBFF + 'Low Surrogates', # DC00..DFFF + 'Private Use Area', # E000..F8FF + 'CJK Compatibility Ideographs', # F900..FAFF + 'Alphabetic Presentation Forms', # FB00..FB4F + 'Arabic Presentation Forms-A', # FB50..FDFF + 'Variation Selectors', # FE00..FE0F + 'Vertical Forms', # FE10..FE1F + 'Combining Half Marks', # FE20..FE2F + 'CJK Compatibility Forms', # FE30..FE4F + 'Small Form Variants', # FE50..FE6F + 'Arabic Presentation Forms-B', # FE70..FEFF + 'Halfwidth and Fullwidth Forms', # FF00..FFEF + 'Specials', # FFF0..FFFF + 'Linear B Syllabary', # 10000..1007F + 'Linear B Ideograms', # 10080..100FF + 'Aegean Numbers', # 10100..1013F + 'Ancient Greek Numbers', # 10140..1018F + 'Ancient Symbols', # 10190..101CF + 'Phaistos Disc', # 101D0..101FF + 'No_Block', # 10200..1027F + 'Lycian', # 10280..1029F + 'Carian', # 102A0..102DF + 'Coptic Epact Numbers', # 102E0..102FF + 'Old Italic', # 10300..1032F + 'Gothic', # 10330..1034F + 'Old Permic', # 10350..1037F + 'Ugaritic', # 10380..1039F + 'Old Persian', # 103A0..103DF + 'No_Block', # 103E0..103FF + 'Deseret', # 10400..1044F + 'Shavian', # 10450..1047F + 'Osmanya', # 10480..104AF + 'Osage', # 104B0..104FF + 'Elbasan', # 10500..1052F + 'Caucasian Albanian', # 10530..1056F + 'Vithkuqi', # 10570..105BF + 'No_Block', # 105C0..105FF + 'Linear A', # 10600..1077F + 'Latin Extended-F', # 10780..107BF + 'No_Block', # 107C0..107FF + 'Cypriot Syllabary', # 10800..1083F + 'Imperial Aramaic', # 10840..1085F + 'Palmyrene', # 10860..1087F + 'Nabataean', # 10880..108AF + 'No_Block', # 108B0..108DF + 'Hatran', # 108E0..108FF + 'Phoenician', # 10900..1091F + 'Lydian', # 10920..1093F + 'No_Block', # 10940..1097F + 'Meroitic Hieroglyphs', # 10980..1099F + 'Meroitic Cursive', # 109A0..109FF + 'Kharoshthi', # 10A00..10A5F + 'Old South Arabian', # 10A60..10A7F + 'Old North Arabian', # 10A80..10A9F + 'No_Block', # 10AA0..10ABF + 'Manichaean', # 10AC0..10AFF + 'Avestan', # 10B00..10B3F + 'Inscriptional Parthian', # 10B40..10B5F + 'Inscriptional Pahlavi', # 10B60..10B7F + 'Psalter Pahlavi', # 10B80..10BAF + 'No_Block', # 10BB0..10BFF + 'Old Turkic', # 10C00..10C4F + 'No_Block', # 10C50..10C7F + 'Old Hungarian', # 10C80..10CFF + 'Hanifi Rohingya', # 10D00..10D3F + 'No_Block', # 10D40..10E5F + 'Rumi Numeral Symbols', # 10E60..10E7F + 'Yezidi', # 10E80..10EBF + 'No_Block', # 10EC0..10EFF + 'Old Sogdian', # 10F00..10F2F + 'Sogdian', # 10F30..10F6F + 'Old Uyghur', # 10F70..10FAF + 'Chorasmian', # 10FB0..10FDF + 'Elymaic', # 10FE0..10FFF + 'Brahmi', # 11000..1107F + 'Kaithi', # 11080..110CF + 'Sora Sompeng', # 110D0..110FF + 'Chakma', # 11100..1114F + 'Mahajani', # 11150..1117F + 'Sharada', # 11180..111DF + 'Sinhala Archaic Numbers', # 111E0..111FF + 'Khojki', # 11200..1124F + 'No_Block', # 11250..1127F + 'Multani', # 11280..112AF + 'Khudawadi', # 112B0..112FF + 'Grantha', # 11300..1137F + 'No_Block', # 11380..113FF + 'Newa', # 11400..1147F + 'Tirhuta', # 11480..114DF + 'No_Block', # 114E0..1157F + 'Siddham', # 11580..115FF + 'Modi', # 11600..1165F + 'Mongolian Supplement', # 11660..1167F + 'Takri', # 11680..116CF + 'No_Block', # 116D0..116FF + 'Ahom', # 11700..1174F + 'No_Block', # 11750..117FF + 'Dogra', # 11800..1184F + 'No_Block', # 11850..1189F + 'Warang Citi', # 118A0..118FF + 'Dives Akuru', # 11900..1195F + 'No_Block', # 11960..1199F + 'Nandinagari', # 119A0..119FF + 'Zanabazar Square', # 11A00..11A4F + 'Soyombo', # 11A50..11AAF + 'Unified Canadian Aboriginal Syllabics Extended-A', # 11AB0..11ABF + 'Pau Cin Hau', # 11AC0..11AFF + 'No_Block', # 11B00..11BFF + 'Bhaiksuki', # 11C00..11C6F + 'Marchen', # 11C70..11CBF + 'No_Block', # 11CC0..11CFF + 'Masaram Gondi', # 11D00..11D5F + 'Gunjala Gondi', # 11D60..11DAF + 'No_Block', # 11DB0..11EDF + 'Makasar', # 11EE0..11EFF + 'No_Block', # 11F00..11FAF + 'Lisu Supplement', # 11FB0..11FBF + 'Tamil Supplement', # 11FC0..11FFF + 'Cuneiform', # 12000..123FF + 'Cuneiform Numbers and Punctuation', # 12400..1247F + 'Early Dynastic Cuneiform', # 12480..1254F + 'No_Block', # 12550..12F8F + 'Cypro-Minoan', # 12F90..12FFF + 'Egyptian Hieroglyphs', # 13000..1342F + 'Egyptian Hieroglyph Format Controls', # 13430..1343F + 'No_Block', # 13440..143FF + 'Anatolian Hieroglyphs', # 14400..1467F + 'No_Block', # 14680..167FF + 'Bamum Supplement', # 16800..16A3F + 'Mro', # 16A40..16A6F + 'Tangsa', # 16A70..16ACF + 'Bassa Vah', # 16AD0..16AFF + 'Pahawh Hmong', # 16B00..16B8F + 'No_Block', # 16B90..16E3F + 'Medefaidrin', # 16E40..16E9F + 'No_Block', # 16EA0..16EFF + 'Miao', # 16F00..16F9F + 'No_Block', # 16FA0..16FDF + 'Ideographic Symbols and Punctuation', # 16FE0..16FFF + 'Tangut', # 17000..187FF + 'Tangut Components', # 18800..18AFF + 'Khitan Small Script', # 18B00..18CFF + 'Tangut Supplement', # 18D00..18D7F + 'No_Block', # 18D80..1AFEF + 'Kana Extended-B', # 1AFF0..1AFFF + 'Kana Supplement', # 1B000..1B0FF + 'Kana Extended-A', # 1B100..1B12F + 'Small Kana Extension', # 1B130..1B16F + 'Nushu', # 1B170..1B2FF + 'No_Block', # 1B300..1BBFF + 'Duployan', # 1BC00..1BC9F + 'Shorthand Format Controls', # 1BCA0..1BCAF + 'No_Block', # 1BCB0..1CEFF + 'Znamenny Musical Notation', # 1CF00..1CFCF + 'No_Block', # 1CFD0..1CFFF + 'Byzantine Musical Symbols', # 1D000..1D0FF + 'Musical Symbols', # 1D100..1D1FF + 'Ancient Greek Musical Notation', # 1D200..1D24F + 'No_Block', # 1D250..1D2DF + 'Mayan Numerals', # 1D2E0..1D2FF + 'Tai Xuan Jing Symbols', # 1D300..1D35F + 'Counting Rod Numerals', # 1D360..1D37F + 'No_Block', # 1D380..1D3FF + 'Mathematical Alphanumeric Symbols', # 1D400..1D7FF + 'Sutton SignWriting', # 1D800..1DAAF + 'No_Block', # 1DAB0..1DEFF + 'Latin Extended-G', # 1DF00..1DFFF + 'Glagolitic Supplement', # 1E000..1E02F + 'No_Block', # 1E030..1E0FF + 'Nyiakeng Puachue Hmong', # 1E100..1E14F + 'No_Block', # 1E150..1E28F + 'Toto', # 1E290..1E2BF + 'Wancho', # 1E2C0..1E2FF + 'No_Block', # 1E300..1E7DF + 'Ethiopic Extended-B', # 1E7E0..1E7FF + 'Mende Kikakui', # 1E800..1E8DF + 'No_Block', # 1E8E0..1E8FF + 'Adlam', # 1E900..1E95F + 'No_Block', # 1E960..1EC6F + 'Indic Siyaq Numbers', # 1EC70..1ECBF + 'No_Block', # 1ECC0..1ECFF + 'Ottoman Siyaq Numbers', # 1ED00..1ED4F + 'No_Block', # 1ED50..1EDFF + 'Arabic Mathematical Alphabetic Symbols', # 1EE00..1EEFF + 'No_Block', # 1EF00..1EFFF + 'Mahjong Tiles', # 1F000..1F02F + 'Domino Tiles', # 1F030..1F09F + 'Playing Cards', # 1F0A0..1F0FF + 'Enclosed Alphanumeric Supplement', # 1F100..1F1FF + 'Enclosed Ideographic Supplement', # 1F200..1F2FF + 'Miscellaneous Symbols and Pictographs', # 1F300..1F5FF + 'Emoticons', # 1F600..1F64F + 'Ornamental Dingbats', # 1F650..1F67F + 'Transport and Map Symbols', # 1F680..1F6FF + 'Alchemical Symbols', # 1F700..1F77F + 'Geometric Shapes Extended', # 1F780..1F7FF + 'Supplemental Arrows-C', # 1F800..1F8FF + 'Supplemental Symbols and Pictographs', # 1F900..1F9FF + 'Chess Symbols', # 1FA00..1FA6F + 'Symbols and Pictographs Extended-A', # 1FA70..1FAFF + 'Symbols for Legacy Computing', # 1FB00..1FBFF + 'No_Block', # 1FC00..1FFFF + 'CJK Unified Ideographs Extension B', # 20000..2A6DF + 'No_Block', # 2A6E0..2A6FF + 'CJK Unified Ideographs Extension C', # 2A700..2B73F + 'CJK Unified Ideographs Extension D', # 2B740..2B81F + 'CJK Unified Ideographs Extension E', # 2B820..2CEAF + 'CJK Unified Ideographs Extension F', # 2CEB0..2EBEF + 'No_Block', # 2EBF0..2F7FF + 'CJK Compatibility Ideographs Supplement', # 2F800..2FA1F + 'No_Block', # 2FA20..2FFFF + 'CJK Unified Ideographs Extension G', # 30000..3134F + 'No_Block', # 31350..DFFFF + 'Tags', # E0000..E007F + 'No_Block', # E0080..E00FF + 'Variation Selectors Supplement', # E0100..E01EF + 'No_Block', # E01F0..EFFFF + 'Supplementary Private Use Area-A', # F0000..FFFFF + 'Supplementary Private Use Area-B', # 100000..10FFFF ] diff --git a/Lib/fontTools/unicodedata/ScriptExtensions.py b/Lib/fontTools/unicodedata/ScriptExtensions.py index b4e09cd277..b078c13e6b 100644 --- a/Lib/fontTools/unicodedata/ScriptExtensions.py +++ b/Lib/fontTools/unicodedata/ScriptExtensions.py @@ -4,9 +4,9 @@ # Source: https://unicode.org/Public/UNIDATA/ScriptExtensions.txt # License: http://unicode.org/copyright.html#License # -# ScriptExtensions-13.0.0.txt -# Date: 2020-01-22, 00:07:43 GMT -# © 2020 Unicode®, Inc. +# ScriptExtensions-14.0.0.txt +# Date: 2021-06-04, 02:19:38 GMT +# © 2021 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # @@ -17,10 +17,10 @@ # with more than one script, but with a limited number of scripts. # For each code point, there is one or more property values. Each such value is a Script property value. # For more information, see: -# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/ +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ # Especially the sections: -# http://www.unicode.org/reports/tr24/#Assignment_Script_Values -# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values # # Each Script_Extensions value in this file consists of a set # of one or more abbreviated Script property values. The ordering of the @@ -53,14 +53,14 @@ 0x0485, # .. 0x0486 ; {'Cyrl', 'Latn'} 0x0487, # .. 0x0487 ; {'Cyrl', 'Glag'} 0x0488, # .. 0x060B ; None - 0x060C, # .. 0x060C ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} + 0x060C, # .. 0x060C ; {'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x060D, # .. 0x061A ; None - 0x061B, # .. 0x061B ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} + 0x061B, # .. 0x061B ; {'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x061C, # .. 0x061C ; {'Arab', 'Syrc', 'Thaa'} 0x061D, # .. 0x061E ; None - 0x061F, # .. 0x061F ; {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} + 0x061F, # .. 0x061F ; {'Adlm', 'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'} 0x0620, # .. 0x063F ; None - 0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Rohg', 'Sogd', 'Syrc'} + 0x0640, # .. 0x0640 ; {'Adlm', 'Arab', 'Mand', 'Mani', 'Ougr', 'Phlp', 'Rohg', 'Sogd', 'Syrc'} 0x0641, # .. 0x064A ; None 0x064B, # .. 0x0655 ; {'Arab', 'Syrc'} 0x0656, # .. 0x065F ; None @@ -129,7 +129,9 @@ 0x1DC0, # .. 0x1DC1 ; {'Grek'} 0x1DC2, # .. 0x1DF7 ; None 0x1DF8, # .. 0x1DF8 ; {'Cyrl', 'Syrc'} - 0x1DF9, # .. 0x202E ; None + 0x1DF9, # .. 0x1DF9 ; None + 0x1DFA, # .. 0x1DFA ; {'Syrc'} + 0x1DFB, # .. 0x202E ; None 0x202F, # .. 0x202F ; {'Latn', 'Mong'} 0x2030, # .. 0x20EF ; None 0x20F0, # .. 0x20F0 ; {'Deva', 'Gran', 'Latn'} @@ -197,7 +199,9 @@ 0xA92E, # .. 0xA92E ; {'Kali', 'Latn', 'Mymr'} 0xA92F, # .. 0xA9CE ; None 0xA9CF, # .. 0xA9CF ; {'Bugi', 'Java'} - 0xA9D0, # .. 0xFDF1 ; None + 0xA9D0, # .. 0xFD3D ; None + 0xFD3E, # .. 0xFD3F ; {'Arab', 'Nkoo'} + 0xFD40, # .. 0xFDF1 ; None 0xFDF2, # .. 0xFDF2 ; {'Arab', 'Thaa'} 0xFDF3, # .. 0xFDFC ; None 0xFDFD, # .. 0xFDFD ; {'Arab', 'Thaa'} @@ -210,14 +214,17 @@ 0xFF71, # .. 0xFF9D ; None 0xFF9E, # .. 0xFF9F ; {'Hira', 'Kana'} 0xFFA0, # .. 0x100FF ; None - 0x10100, # .. 0x10102 ; {'Cprt', 'Linb'} + 0x10100, # .. 0x10101 ; {'Cpmn', 'Cprt', 'Linb'} + 0x10102, # .. 0x10102 ; {'Cprt', 'Linb'} 0x10103, # .. 0x10106 ; None 0x10107, # .. 0x10133 ; {'Cprt', 'Lina', 'Linb'} 0x10134, # .. 0x10136 ; None 0x10137, # .. 0x1013F ; {'Cprt', 'Linb'} 0x10140, # .. 0x102DF ; None 0x102E0, # .. 0x102FB ; {'Arab', 'Copt'} - 0x102FC, # .. 0x11300 ; None + 0x102FC, # .. 0x10AF1 ; None + 0x10AF2, # .. 0x10AF2 ; {'Mani', 'Ougr'} + 0x10AF3, # .. 0x11300 ; None 0x11301, # .. 0x11301 ; {'Gran', 'Taml'} 0x11302, # .. 0x11302 ; None 0x11303, # .. 0x11303 ; {'Gran', 'Taml'} @@ -249,14 +256,14 @@ {'Cyrl', 'Latn'}, # 0485..0486 {'Cyrl', 'Glag'}, # 0487..0487 None, # 0488..060B - {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 060C..060C + {'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 060C..060C None, # 060D..061A - {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061B..061B + {'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061B..061B {'Arab', 'Syrc', 'Thaa'}, # 061C..061C None, # 061D..061E - {'Arab', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061F..061F + {'Adlm', 'Arab', 'Nkoo', 'Rohg', 'Syrc', 'Thaa', 'Yezi'}, # 061F..061F None, # 0620..063F - {'Adlm', 'Arab', 'Mand', 'Mani', 'Phlp', 'Rohg', 'Sogd', 'Syrc'}, # 0640..0640 + {'Adlm', 'Arab', 'Mand', 'Mani', 'Ougr', 'Phlp', 'Rohg', 'Sogd', 'Syrc'}, # 0640..0640 None, # 0641..064A {'Arab', 'Syrc'}, # 064B..0655 None, # 0656..065F @@ -325,7 +332,9 @@ {'Grek'}, # 1DC0..1DC1 None, # 1DC2..1DF7 {'Cyrl', 'Syrc'}, # 1DF8..1DF8 - None, # 1DF9..202E + None, # 1DF9..1DF9 + {'Syrc'}, # 1DFA..1DFA + None, # 1DFB..202E {'Latn', 'Mong'}, # 202F..202F None, # 2030..20EF {'Deva', 'Gran', 'Latn'}, # 20F0..20F0 @@ -393,7 +402,9 @@ {'Kali', 'Latn', 'Mymr'}, # A92E..A92E None, # A92F..A9CE {'Bugi', 'Java'}, # A9CF..A9CF - None, # A9D0..FDF1 + None, # A9D0..FD3D + {'Arab', 'Nkoo'}, # FD3E..FD3F + None, # FD40..FDF1 {'Arab', 'Thaa'}, # FDF2..FDF2 None, # FDF3..FDFC {'Arab', 'Thaa'}, # FDFD..FDFD @@ -406,14 +417,17 @@ None, # FF71..FF9D {'Hira', 'Kana'}, # FF9E..FF9F None, # FFA0..100FF - {'Cprt', 'Linb'}, # 10100..10102 + {'Cpmn', 'Cprt', 'Linb'}, # 10100..10101 + {'Cprt', 'Linb'}, # 10102..10102 None, # 10103..10106 {'Cprt', 'Lina', 'Linb'}, # 10107..10133 None, # 10134..10136 {'Cprt', 'Linb'}, # 10137..1013F None, # 10140..102DF {'Arab', 'Copt'}, # 102E0..102FB - None, # 102FC..11300 + None, # 102FC..10AF1 + {'Mani', 'Ougr'}, # 10AF2..10AF2 + None, # 10AF3..11300 {'Gran', 'Taml'}, # 11301..11301 None, # 11302..11302 {'Gran', 'Taml'}, # 11303..11303 diff --git a/Lib/fontTools/unicodedata/Scripts.py b/Lib/fontTools/unicodedata/Scripts.py index 12f9a0e311..18cada93a9 100644 --- a/Lib/fontTools/unicodedata/Scripts.py +++ b/Lib/fontTools/unicodedata/Scripts.py @@ -4,19 +4,19 @@ # Source: https://unicode.org/Public/UNIDATA/Scripts.txt # License: http://unicode.org/copyright.html#License # -# Scripts-13.0.0.txt -# Date: 2020-01-22, 00:07:43 GMT -# © 2020 Unicode®, Inc. +# Scripts-14.0.0.txt +# Date: 2021-07-10, 00:35:31 GMT +# © 2021 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see http://www.unicode.org/terms_of_use.html # # Unicode Character Database # For documentation, see http://www.unicode.org/reports/tr44/ # For more information, see: -# UAX #24, Unicode Script Property: http://www.unicode.org/reports/tr24/ +# UAX #24, Unicode Script Property: https://www.unicode.org/reports/tr24/ # Especially the sections: -# http://www.unicode.org/reports/tr24/#Assignment_Script_Values -# http://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values +# https://www.unicode.org/reports/tr24/#Assignment_Script_Values +# https://www.unicode.org/reports/tr24/#Assignment_ScriptX_Values # @@ -84,9 +84,7 @@ 0x060C, # .. 0x060C ; Common 0x060D, # .. 0x061A ; Arabic 0x061B, # .. 0x061B ; Common - 0x061C, # .. 0x061C ; Arabic - 0x061D, # .. 0x061D ; Unknown - 0x061E, # .. 0x061E ; Arabic + 0x061C, # .. 0x061E ; Arabic 0x061F, # .. 0x061F ; Common 0x0620, # .. 0x063F ; Arabic 0x0640, # .. 0x0640 ; Common @@ -117,12 +115,12 @@ 0x085E, # .. 0x085E ; Mandaic 0x085F, # .. 0x085F ; Unknown 0x0860, # .. 0x086A ; Syriac - 0x086B, # .. 0x089F ; Unknown - 0x08A0, # .. 0x08B4 ; Arabic - 0x08B5, # .. 0x08B5 ; Unknown - 0x08B6, # .. 0x08C7 ; Arabic - 0x08C8, # .. 0x08D2 ; Unknown - 0x08D3, # .. 0x08E1 ; Arabic + 0x086B, # .. 0x086F ; Unknown + 0x0870, # .. 0x088E ; Arabic + 0x088F, # .. 0x088F ; Unknown + 0x0890, # .. 0x0891 ; Arabic + 0x0892, # .. 0x0897 ; Unknown + 0x0898, # .. 0x08E1 ; Arabic 0x08E2, # .. 0x08E2 ; Common 0x08E3, # .. 0x08FF ; Arabic 0x0900, # .. 0x0950 ; Devanagari @@ -285,8 +283,8 @@ 0x0C12, # .. 0x0C28 ; Telugu 0x0C29, # .. 0x0C29 ; Unknown 0x0C2A, # .. 0x0C39 ; Telugu - 0x0C3A, # .. 0x0C3C ; Unknown - 0x0C3D, # .. 0x0C44 ; Telugu + 0x0C3A, # .. 0x0C3B ; Unknown + 0x0C3C, # .. 0x0C44 ; Telugu 0x0C45, # .. 0x0C45 ; Unknown 0x0C46, # .. 0x0C48 ; Telugu 0x0C49, # .. 0x0C49 ; Unknown @@ -295,7 +293,9 @@ 0x0C55, # .. 0x0C56 ; Telugu 0x0C57, # .. 0x0C57 ; Unknown 0x0C58, # .. 0x0C5A ; Telugu - 0x0C5B, # .. 0x0C5F ; Unknown + 0x0C5B, # .. 0x0C5C ; Unknown + 0x0C5D, # .. 0x0C5D ; Telugu + 0x0C5E, # .. 0x0C5F ; Unknown 0x0C60, # .. 0x0C63 ; Telugu 0x0C64, # .. 0x0C65 ; Unknown 0x0C66, # .. 0x0C6F ; Telugu @@ -318,8 +318,8 @@ 0x0CCA, # .. 0x0CCD ; Kannada 0x0CCE, # .. 0x0CD4 ; Unknown 0x0CD5, # .. 0x0CD6 ; Kannada - 0x0CD7, # .. 0x0CDD ; Unknown - 0x0CDE, # .. 0x0CDE ; Kannada + 0x0CD7, # .. 0x0CDC ; Unknown + 0x0CDD, # .. 0x0CDE ; Kannada 0x0CDF, # .. 0x0CDF ; Unknown 0x0CE0, # .. 0x0CE3 ; Kannada 0x0CE4, # .. 0x0CE5 ; Unknown @@ -464,10 +464,9 @@ 0x16EB, # .. 0x16ED ; Common 0x16EE, # .. 0x16F8 ; Runic 0x16F9, # .. 0x16FF ; Unknown - 0x1700, # .. 0x170C ; Tagalog - 0x170D, # .. 0x170D ; Unknown - 0x170E, # .. 0x1714 ; Tagalog - 0x1715, # .. 0x171F ; Unknown + 0x1700, # .. 0x1715 ; Tagalog + 0x1716, # .. 0x171E ; Unknown + 0x171F, # .. 0x171F ; Tagalog 0x1720, # .. 0x1734 ; Hanunoo 0x1735, # .. 0x1736 ; Common 0x1737, # .. 0x173F ; Unknown @@ -489,9 +488,7 @@ 0x1802, # .. 0x1803 ; Common 0x1804, # .. 0x1804 ; Mongolian 0x1805, # .. 0x1805 ; Common - 0x1806, # .. 0x180E ; Mongolian - 0x180F, # .. 0x180F ; Unknown - 0x1810, # .. 0x1819 ; Mongolian + 0x1806, # .. 0x1819 ; Mongolian 0x181A, # .. 0x181F ; Unknown 0x1820, # .. 0x1878 ; Mongolian 0x1879, # .. 0x187F ; Unknown @@ -533,12 +530,12 @@ 0x1A9A, # .. 0x1A9F ; Unknown 0x1AA0, # .. 0x1AAD ; Tai_Tham 0x1AAE, # .. 0x1AAF ; Unknown - 0x1AB0, # .. 0x1AC0 ; Inherited - 0x1AC1, # .. 0x1AFF ; Unknown - 0x1B00, # .. 0x1B4B ; Balinese - 0x1B4C, # .. 0x1B4F ; Unknown - 0x1B50, # .. 0x1B7C ; Balinese - 0x1B7D, # .. 0x1B7F ; Unknown + 0x1AB0, # .. 0x1ACE ; Inherited + 0x1ACF, # .. 0x1AFF ; Unknown + 0x1B00, # .. 0x1B4C ; Balinese + 0x1B4D, # .. 0x1B4F ; Unknown + 0x1B50, # .. 0x1B7E ; Balinese + 0x1B7F, # .. 0x1B7F ; Unknown 0x1B80, # .. 0x1BBF ; Sundanese 0x1BC0, # .. 0x1BF3 ; Batak 0x1BF4, # .. 0x1BFB ; Unknown @@ -580,9 +577,7 @@ 0x1D78, # .. 0x1D78 ; Cyrillic 0x1D79, # .. 0x1DBE ; Latin 0x1DBF, # .. 0x1DBF ; Greek - 0x1DC0, # .. 0x1DF9 ; Inherited - 0x1DFA, # .. 0x1DFA ; Unknown - 0x1DFB, # .. 0x1DFF ; Inherited + 0x1DC0, # .. 0x1DFF ; Inherited 0x1E00, # .. 0x1EFF ; Latin 0x1F00, # .. 0x1F15 ; Greek 0x1F16, # .. 0x1F17 ; Unknown @@ -629,8 +624,8 @@ 0x208F, # .. 0x208F ; Unknown 0x2090, # .. 0x209C ; Latin 0x209D, # .. 0x209F ; Unknown - 0x20A0, # .. 0x20BF ; Common - 0x20C0, # .. 0x20CF ; Unknown + 0x20A0, # .. 0x20C0 ; Common + 0x20C1, # .. 0x20CF ; Unknown 0x20D0, # .. 0x20F0 ; Inherited 0x20F1, # .. 0x20FF ; Unknown 0x2100, # .. 0x2125 ; Common @@ -656,10 +651,7 @@ 0x2B76, # .. 0x2B95 ; Common 0x2B96, # .. 0x2B96 ; Unknown 0x2B97, # .. 0x2BFF ; Common - 0x2C00, # .. 0x2C2E ; Glagolitic - 0x2C2F, # .. 0x2C2F ; Unknown - 0x2C30, # .. 0x2C5E ; Glagolitic - 0x2C5F, # .. 0x2C5F ; Unknown + 0x2C00, # .. 0x2C5F ; Glagolitic 0x2C60, # .. 0x2C7F ; Latin 0x2C80, # .. 0x2CF3 ; Coptic 0x2CF4, # .. 0x2CF8 ; Unknown @@ -694,8 +686,8 @@ 0x2DD8, # .. 0x2DDE ; Ethiopic 0x2DDF, # .. 0x2DDF ; Unknown 0x2DE0, # .. 0x2DFF ; Cyrillic - 0x2E00, # .. 0x2E52 ; Common - 0x2E53, # .. 0x2E7F ; Unknown + 0x2E00, # .. 0x2E5D ; Common + 0x2E5E, # .. 0x2E7F ; Unknown 0x2E80, # .. 0x2E99 ; Han 0x2E9A, # .. 0x2E9A ; Unknown 0x2E9B, # .. 0x2EF3 ; Han @@ -746,8 +738,7 @@ 0x3358, # .. 0x33FF ; Common 0x3400, # .. 0x4DBF ; Han 0x4DC0, # .. 0x4DFF ; Common - 0x4E00, # .. 0x9FFC ; Han - 0x9FFD, # .. 0x9FFF ; Unknown + 0x4E00, # .. 0x9FFF ; Han 0xA000, # .. 0xA48C ; Yi 0xA48D, # .. 0xA48F ; Unknown 0xA490, # .. 0xA4C6 ; Yi @@ -761,11 +752,15 @@ 0xA700, # .. 0xA721 ; Common 0xA722, # .. 0xA787 ; Latin 0xA788, # .. 0xA78A ; Common - 0xA78B, # .. 0xA7BF ; Latin - 0xA7C0, # .. 0xA7C1 ; Unknown - 0xA7C2, # .. 0xA7CA ; Latin - 0xA7CB, # .. 0xA7F4 ; Unknown - 0xA7F5, # .. 0xA7FF ; Latin + 0xA78B, # .. 0xA7CA ; Latin + 0xA7CB, # .. 0xA7CF ; Unknown + 0xA7D0, # .. 0xA7D1 ; Latin + 0xA7D2, # .. 0xA7D2 ; Unknown + 0xA7D3, # .. 0xA7D3 ; Latin + 0xA7D4, # .. 0xA7D4 ; Unknown + 0xA7D5, # .. 0xA7D9 ; Latin + 0xA7DA, # .. 0xA7F1 ; Unknown + 0xA7F2, # .. 0xA7FF ; Latin 0xA800, # .. 0xA82C ; Syloti_Nagri 0xA82D, # .. 0xA82F ; Unknown 0xA830, # .. 0xA839 ; Common @@ -853,17 +848,17 @@ 0xFB43, # .. 0xFB44 ; Hebrew 0xFB45, # .. 0xFB45 ; Unknown 0xFB46, # .. 0xFB4F ; Hebrew - 0xFB50, # .. 0xFBC1 ; Arabic - 0xFBC2, # .. 0xFBD2 ; Unknown + 0xFB50, # .. 0xFBC2 ; Arabic + 0xFBC3, # .. 0xFBD2 ; Unknown 0xFBD3, # .. 0xFD3D ; Arabic 0xFD3E, # .. 0xFD3F ; Common - 0xFD40, # .. 0xFD4F ; Unknown - 0xFD50, # .. 0xFD8F ; Arabic + 0xFD40, # .. 0xFD8F ; Arabic 0xFD90, # .. 0xFD91 ; Unknown 0xFD92, # .. 0xFDC7 ; Arabic - 0xFDC8, # .. 0xFDEF ; Unknown - 0xFDF0, # .. 0xFDFD ; Arabic - 0xFDFE, # .. 0xFDFF ; Unknown + 0xFDC8, # .. 0xFDCE ; Unknown + 0xFDCF, # .. 0xFDCF ; Arabic + 0xFDD0, # .. 0xFDEF ; Unknown + 0xFDF0, # .. 0xFDFF ; Arabic 0xFE00, # .. 0xFE0F ; Inherited 0xFE10, # .. 0xFE19 ; Common 0xFE1A, # .. 0xFE1F ; Unknown @@ -970,13 +965,34 @@ 0x10530, # .. 0x10563 ; Caucasian_Albanian 0x10564, # .. 0x1056E ; Unknown 0x1056F, # .. 0x1056F ; Caucasian_Albanian - 0x10570, # .. 0x105FF ; Unknown + 0x10570, # .. 0x1057A ; Vithkuqi + 0x1057B, # .. 0x1057B ; Unknown + 0x1057C, # .. 0x1058A ; Vithkuqi + 0x1058B, # .. 0x1058B ; Unknown + 0x1058C, # .. 0x10592 ; Vithkuqi + 0x10593, # .. 0x10593 ; Unknown + 0x10594, # .. 0x10595 ; Vithkuqi + 0x10596, # .. 0x10596 ; Unknown + 0x10597, # .. 0x105A1 ; Vithkuqi + 0x105A2, # .. 0x105A2 ; Unknown + 0x105A3, # .. 0x105B1 ; Vithkuqi + 0x105B2, # .. 0x105B2 ; Unknown + 0x105B3, # .. 0x105B9 ; Vithkuqi + 0x105BA, # .. 0x105BA ; Unknown + 0x105BB, # .. 0x105BC ; Vithkuqi + 0x105BD, # .. 0x105FF ; Unknown 0x10600, # .. 0x10736 ; Linear_A 0x10737, # .. 0x1073F ; Unknown 0x10740, # .. 0x10755 ; Linear_A 0x10756, # .. 0x1075F ; Unknown 0x10760, # .. 0x10767 ; Linear_A - 0x10768, # .. 0x107FF ; Unknown + 0x10768, # .. 0x1077F ; Unknown + 0x10780, # .. 0x10785 ; Latin + 0x10786, # .. 0x10786 ; Unknown + 0x10787, # .. 0x107B0 ; Latin + 0x107B1, # .. 0x107B1 ; Unknown + 0x107B2, # .. 0x107BA ; Latin + 0x107BB, # .. 0x107FF ; Unknown 0x10800, # .. 0x10805 ; Cypriot 0x10806, # .. 0x10807 ; Unknown 0x10808, # .. 0x10808 ; Cypriot @@ -1074,18 +1090,20 @@ 0x10F00, # .. 0x10F27 ; Old_Sogdian 0x10F28, # .. 0x10F2F ; Unknown 0x10F30, # .. 0x10F59 ; Sogdian - 0x10F5A, # .. 0x10FAF ; Unknown + 0x10F5A, # .. 0x10F6F ; Unknown + 0x10F70, # .. 0x10F89 ; Old_Uyghur + 0x10F8A, # .. 0x10FAF ; Unknown 0x10FB0, # .. 0x10FCB ; Chorasmian 0x10FCC, # .. 0x10FDF ; Unknown 0x10FE0, # .. 0x10FF6 ; Elymaic 0x10FF7, # .. 0x10FFF ; Unknown 0x11000, # .. 0x1104D ; Brahmi 0x1104E, # .. 0x11051 ; Unknown - 0x11052, # .. 0x1106F ; Brahmi - 0x11070, # .. 0x1107E ; Unknown + 0x11052, # .. 0x11075 ; Brahmi + 0x11076, # .. 0x1107E ; Unknown 0x1107F, # .. 0x1107F ; Brahmi - 0x11080, # .. 0x110C1 ; Kaithi - 0x110C2, # .. 0x110CC ; Unknown + 0x11080, # .. 0x110C2 ; Kaithi + 0x110C3, # .. 0x110CC ; Unknown 0x110CD, # .. 0x110CD ; Kaithi 0x110CE, # .. 0x110CF ; Unknown 0x110D0, # .. 0x110E8 ; Sora_Sompeng @@ -1169,16 +1187,16 @@ 0x1165A, # .. 0x1165F ; Unknown 0x11660, # .. 0x1166C ; Mongolian 0x1166D, # .. 0x1167F ; Unknown - 0x11680, # .. 0x116B8 ; Takri - 0x116B9, # .. 0x116BF ; Unknown + 0x11680, # .. 0x116B9 ; Takri + 0x116BA, # .. 0x116BF ; Unknown 0x116C0, # .. 0x116C9 ; Takri 0x116CA, # .. 0x116FF ; Unknown 0x11700, # .. 0x1171A ; Ahom 0x1171B, # .. 0x1171C ; Unknown 0x1171D, # .. 0x1172B ; Ahom 0x1172C, # .. 0x1172F ; Unknown - 0x11730, # .. 0x1173F ; Ahom - 0x11740, # .. 0x117FF ; Unknown + 0x11730, # .. 0x11746 ; Ahom + 0x11747, # .. 0x117FF ; Unknown 0x11800, # .. 0x1183B ; Dogra 0x1183C, # .. 0x1189F ; Unknown 0x118A0, # .. 0x118F2 ; Warang_Citi @@ -1209,7 +1227,8 @@ 0x11A00, # .. 0x11A47 ; Zanabazar_Square 0x11A48, # .. 0x11A4F ; Unknown 0x11A50, # .. 0x11AA2 ; Soyombo - 0x11AA3, # .. 0x11ABF ; Unknown + 0x11AA3, # .. 0x11AAF ; Unknown + 0x11AB0, # .. 0x11ABF ; Canadian_Aboriginal 0x11AC0, # .. 0x11AF8 ; Pau_Cin_Hau 0x11AF9, # .. 0x11BFF ; Unknown 0x11C00, # .. 0x11C08 ; Bhaiksuki @@ -1266,7 +1285,9 @@ 0x12470, # .. 0x12474 ; Cuneiform 0x12475, # .. 0x1247F ; Unknown 0x12480, # .. 0x12543 ; Cuneiform - 0x12544, # .. 0x12FFF ; Unknown + 0x12544, # .. 0x12F8F ; Unknown + 0x12F90, # .. 0x12FF2 ; Cypro_Minoan + 0x12FF3, # .. 0x12FFF ; Unknown 0x13000, # .. 0x1342E ; Egyptian_Hieroglyphs 0x1342F, # .. 0x1342F ; Unknown 0x13430, # .. 0x13438 ; Egyptian_Hieroglyphs @@ -1280,7 +1301,10 @@ 0x16A60, # .. 0x16A69 ; Mro 0x16A6A, # .. 0x16A6D ; Unknown 0x16A6E, # .. 0x16A6F ; Mro - 0x16A70, # .. 0x16ACF ; Unknown + 0x16A70, # .. 0x16ABE ; Tangsa + 0x16ABF, # .. 0x16ABF ; Unknown + 0x16AC0, # .. 0x16AC9 ; Tangsa + 0x16ACA, # .. 0x16ACF ; Unknown 0x16AD0, # .. 0x16AED ; Bassa_Vah 0x16AEE, # .. 0x16AEF ; Unknown 0x16AF0, # .. 0x16AF5 ; Bassa_Vah @@ -1305,7 +1329,7 @@ 0x16FA0, # .. 0x16FDF ; Unknown 0x16FE0, # .. 0x16FE0 ; Tangut 0x16FE1, # .. 0x16FE1 ; Nushu - 0x16FE2, # .. 0x16FE3 ; Common + 0x16FE2, # .. 0x16FE3 ; Han 0x16FE4, # .. 0x16FE4 ; Khitan_Small_Script 0x16FE5, # .. 0x16FEF ; Unknown 0x16FF0, # .. 0x16FF1 ; Han @@ -1316,10 +1340,17 @@ 0x18B00, # .. 0x18CD5 ; Khitan_Small_Script 0x18CD6, # .. 0x18CFF ; Unknown 0x18D00, # .. 0x18D08 ; Tangut - 0x18D09, # .. 0x1AFFF ; Unknown + 0x18D09, # .. 0x1AFEF ; Unknown + 0x1AFF0, # .. 0x1AFF3 ; Katakana + 0x1AFF4, # .. 0x1AFF4 ; Unknown + 0x1AFF5, # .. 0x1AFFB ; Katakana + 0x1AFFC, # .. 0x1AFFC ; Unknown + 0x1AFFD, # .. 0x1AFFE ; Katakana + 0x1AFFF, # .. 0x1AFFF ; Unknown 0x1B000, # .. 0x1B000 ; Katakana - 0x1B001, # .. 0x1B11E ; Hiragana - 0x1B11F, # .. 0x1B14F ; Unknown + 0x1B001, # .. 0x1B11F ; Hiragana + 0x1B120, # .. 0x1B122 ; Katakana + 0x1B123, # .. 0x1B14F ; Unknown 0x1B150, # .. 0x1B152 ; Hiragana 0x1B153, # .. 0x1B163 ; Unknown 0x1B164, # .. 0x1B167 ; Katakana @@ -1336,7 +1367,13 @@ 0x1BC9A, # .. 0x1BC9B ; Unknown 0x1BC9C, # .. 0x1BC9F ; Duployan 0x1BCA0, # .. 0x1BCA3 ; Common - 0x1BCA4, # .. 0x1CFFF ; Unknown + 0x1BCA4, # .. 0x1CEFF ; Unknown + 0x1CF00, # .. 0x1CF2D ; Inherited + 0x1CF2E, # .. 0x1CF2F ; Unknown + 0x1CF30, # .. 0x1CF46 ; Inherited + 0x1CF47, # .. 0x1CF4F ; Unknown + 0x1CF50, # .. 0x1CFC3 ; Common + 0x1CFC4, # .. 0x1CFFF ; Unknown 0x1D000, # .. 0x1D0F5 ; Common 0x1D0F6, # .. 0x1D0FF ; Unknown 0x1D100, # .. 0x1D126 ; Common @@ -1349,8 +1386,8 @@ 0x1D185, # .. 0x1D18B ; Inherited 0x1D18C, # .. 0x1D1A9 ; Common 0x1D1AA, # .. 0x1D1AD ; Inherited - 0x1D1AE, # .. 0x1D1E8 ; Common - 0x1D1E9, # .. 0x1D1FF ; Unknown + 0x1D1AE, # .. 0x1D1EA ; Common + 0x1D1EB, # .. 0x1D1FF ; Unknown 0x1D200, # .. 0x1D245 ; Greek 0x1D246, # .. 0x1D2DF ; Unknown 0x1D2E0, # .. 0x1D2F3 ; Common @@ -1405,7 +1442,9 @@ 0x1DA9B, # .. 0x1DA9F ; SignWriting 0x1DAA0, # .. 0x1DAA0 ; Unknown 0x1DAA1, # .. 0x1DAAF ; SignWriting - 0x1DAB0, # .. 0x1DFFF ; Unknown + 0x1DAB0, # .. 0x1DEFF ; Unknown + 0x1DF00, # .. 0x1DF1E ; Latin + 0x1DF1F, # .. 0x1DFFF ; Unknown 0x1E000, # .. 0x1E006 ; Glagolitic 0x1E007, # .. 0x1E007 ; Unknown 0x1E008, # .. 0x1E018 ; Glagolitic @@ -1423,11 +1462,21 @@ 0x1E140, # .. 0x1E149 ; Nyiakeng_Puachue_Hmong 0x1E14A, # .. 0x1E14D ; Unknown 0x1E14E, # .. 0x1E14F ; Nyiakeng_Puachue_Hmong - 0x1E150, # .. 0x1E2BF ; Unknown + 0x1E150, # .. 0x1E28F ; Unknown + 0x1E290, # .. 0x1E2AE ; Toto + 0x1E2AF, # .. 0x1E2BF ; Unknown 0x1E2C0, # .. 0x1E2F9 ; Wancho 0x1E2FA, # .. 0x1E2FE ; Unknown 0x1E2FF, # .. 0x1E2FF ; Wancho - 0x1E300, # .. 0x1E7FF ; Unknown + 0x1E300, # .. 0x1E7DF ; Unknown + 0x1E7E0, # .. 0x1E7E6 ; Ethiopic + 0x1E7E7, # .. 0x1E7E7 ; Unknown + 0x1E7E8, # .. 0x1E7EB ; Ethiopic + 0x1E7EC, # .. 0x1E7EC ; Unknown + 0x1E7ED, # .. 0x1E7EE ; Ethiopic + 0x1E7EF, # .. 0x1E7EF ; Unknown + 0x1E7F0, # .. 0x1E7FE ; Ethiopic + 0x1E7FF, # .. 0x1E7FF ; Unknown 0x1E800, # .. 0x1E8C4 ; Mende_Kikakui 0x1E8C5, # .. 0x1E8C6 ; Unknown 0x1E8C7, # .. 0x1E8D6 ; Mende_Kikakui @@ -1537,8 +1586,8 @@ 0x1F260, # .. 0x1F265 ; Common 0x1F266, # .. 0x1F2FF ; Unknown 0x1F300, # .. 0x1F6D7 ; Common - 0x1F6D8, # .. 0x1F6DF ; Unknown - 0x1F6E0, # .. 0x1F6EC ; Common + 0x1F6D8, # .. 0x1F6DC ; Unknown + 0x1F6DD, # .. 0x1F6EC ; Common 0x1F6ED, # .. 0x1F6EF ; Unknown 0x1F6F0, # .. 0x1F6FC ; Common 0x1F6FD, # .. 0x1F6FF ; Unknown @@ -1547,7 +1596,9 @@ 0x1F780, # .. 0x1F7D8 ; Common 0x1F7D9, # .. 0x1F7DF ; Unknown 0x1F7E0, # .. 0x1F7EB ; Common - 0x1F7EC, # .. 0x1F7FF ; Unknown + 0x1F7EC, # .. 0x1F7EF ; Unknown + 0x1F7F0, # .. 0x1F7F0 ; Common + 0x1F7F1, # .. 0x1F7FF ; Unknown 0x1F800, # .. 0x1F80B ; Common 0x1F80C, # .. 0x1F80F ; Unknown 0x1F810, # .. 0x1F847 ; Common @@ -1560,38 +1611,38 @@ 0x1F8AE, # .. 0x1F8AF ; Unknown 0x1F8B0, # .. 0x1F8B1 ; Common 0x1F8B2, # .. 0x1F8FF ; Unknown - 0x1F900, # .. 0x1F978 ; Common - 0x1F979, # .. 0x1F979 ; Unknown - 0x1F97A, # .. 0x1F9CB ; Common - 0x1F9CC, # .. 0x1F9CC ; Unknown - 0x1F9CD, # .. 0x1FA53 ; Common + 0x1F900, # .. 0x1FA53 ; Common 0x1FA54, # .. 0x1FA5F ; Unknown 0x1FA60, # .. 0x1FA6D ; Common 0x1FA6E, # .. 0x1FA6F ; Unknown 0x1FA70, # .. 0x1FA74 ; Common 0x1FA75, # .. 0x1FA77 ; Unknown - 0x1FA78, # .. 0x1FA7A ; Common - 0x1FA7B, # .. 0x1FA7F ; Unknown + 0x1FA78, # .. 0x1FA7C ; Common + 0x1FA7D, # .. 0x1FA7F ; Unknown 0x1FA80, # .. 0x1FA86 ; Common 0x1FA87, # .. 0x1FA8F ; Unknown - 0x1FA90, # .. 0x1FAA8 ; Common - 0x1FAA9, # .. 0x1FAAF ; Unknown - 0x1FAB0, # .. 0x1FAB6 ; Common - 0x1FAB7, # .. 0x1FABF ; Unknown - 0x1FAC0, # .. 0x1FAC2 ; Common - 0x1FAC3, # .. 0x1FACF ; Unknown - 0x1FAD0, # .. 0x1FAD6 ; Common - 0x1FAD7, # .. 0x1FAFF ; Unknown + 0x1FA90, # .. 0x1FAAC ; Common + 0x1FAAD, # .. 0x1FAAF ; Unknown + 0x1FAB0, # .. 0x1FABA ; Common + 0x1FABB, # .. 0x1FABF ; Unknown + 0x1FAC0, # .. 0x1FAC5 ; Common + 0x1FAC6, # .. 0x1FACF ; Unknown + 0x1FAD0, # .. 0x1FAD9 ; Common + 0x1FADA, # .. 0x1FADF ; Unknown + 0x1FAE0, # .. 0x1FAE7 ; Common + 0x1FAE8, # .. 0x1FAEF ; Unknown + 0x1FAF0, # .. 0x1FAF6 ; Common + 0x1FAF7, # .. 0x1FAFF ; Unknown 0x1FB00, # .. 0x1FB92 ; Common 0x1FB93, # .. 0x1FB93 ; Unknown 0x1FB94, # .. 0x1FBCA ; Common 0x1FBCB, # .. 0x1FBEF ; Unknown 0x1FBF0, # .. 0x1FBF9 ; Common 0x1FBFA, # .. 0x1FFFF ; Unknown - 0x20000, # .. 0x2A6DD ; Han - 0x2A6DE, # .. 0x2A6FF ; Unknown - 0x2A700, # .. 0x2B734 ; Han - 0x2B735, # .. 0x2B73F ; Unknown + 0x20000, # .. 0x2A6DF ; Han + 0x2A6E0, # .. 0x2A6FF ; Unknown + 0x2A700, # .. 0x2B738 ; Han + 0x2B739, # .. 0x2B73F ; Unknown 0x2B740, # .. 0x2B81D ; Han 0x2B81E, # .. 0x2B81F ; Unknown 0x2B820, # .. 0x2CEA1 ; Han @@ -1674,9 +1725,7 @@ 'Zyyy', # 060C..060C ; Common 'Arab', # 060D..061A ; Arabic 'Zyyy', # 061B..061B ; Common - 'Arab', # 061C..061C ; Arabic - 'Zzzz', # 061D..061D ; Unknown - 'Arab', # 061E..061E ; Arabic + 'Arab', # 061C..061E ; Arabic 'Zyyy', # 061F..061F ; Common 'Arab', # 0620..063F ; Arabic 'Zyyy', # 0640..0640 ; Common @@ -1707,12 +1756,12 @@ 'Mand', # 085E..085E ; Mandaic 'Zzzz', # 085F..085F ; Unknown 'Syrc', # 0860..086A ; Syriac - 'Zzzz', # 086B..089F ; Unknown - 'Arab', # 08A0..08B4 ; Arabic - 'Zzzz', # 08B5..08B5 ; Unknown - 'Arab', # 08B6..08C7 ; Arabic - 'Zzzz', # 08C8..08D2 ; Unknown - 'Arab', # 08D3..08E1 ; Arabic + 'Zzzz', # 086B..086F ; Unknown + 'Arab', # 0870..088E ; Arabic + 'Zzzz', # 088F..088F ; Unknown + 'Arab', # 0890..0891 ; Arabic + 'Zzzz', # 0892..0897 ; Unknown + 'Arab', # 0898..08E1 ; Arabic 'Zyyy', # 08E2..08E2 ; Common 'Arab', # 08E3..08FF ; Arabic 'Deva', # 0900..0950 ; Devanagari @@ -1875,8 +1924,8 @@ 'Telu', # 0C12..0C28 ; Telugu 'Zzzz', # 0C29..0C29 ; Unknown 'Telu', # 0C2A..0C39 ; Telugu - 'Zzzz', # 0C3A..0C3C ; Unknown - 'Telu', # 0C3D..0C44 ; Telugu + 'Zzzz', # 0C3A..0C3B ; Unknown + 'Telu', # 0C3C..0C44 ; Telugu 'Zzzz', # 0C45..0C45 ; Unknown 'Telu', # 0C46..0C48 ; Telugu 'Zzzz', # 0C49..0C49 ; Unknown @@ -1885,7 +1934,9 @@ 'Telu', # 0C55..0C56 ; Telugu 'Zzzz', # 0C57..0C57 ; Unknown 'Telu', # 0C58..0C5A ; Telugu - 'Zzzz', # 0C5B..0C5F ; Unknown + 'Zzzz', # 0C5B..0C5C ; Unknown + 'Telu', # 0C5D..0C5D ; Telugu + 'Zzzz', # 0C5E..0C5F ; Unknown 'Telu', # 0C60..0C63 ; Telugu 'Zzzz', # 0C64..0C65 ; Unknown 'Telu', # 0C66..0C6F ; Telugu @@ -1908,8 +1959,8 @@ 'Knda', # 0CCA..0CCD ; Kannada 'Zzzz', # 0CCE..0CD4 ; Unknown 'Knda', # 0CD5..0CD6 ; Kannada - 'Zzzz', # 0CD7..0CDD ; Unknown - 'Knda', # 0CDE..0CDE ; Kannada + 'Zzzz', # 0CD7..0CDC ; Unknown + 'Knda', # 0CDD..0CDE ; Kannada 'Zzzz', # 0CDF..0CDF ; Unknown 'Knda', # 0CE0..0CE3 ; Kannada 'Zzzz', # 0CE4..0CE5 ; Unknown @@ -2054,10 +2105,9 @@ 'Zyyy', # 16EB..16ED ; Common 'Runr', # 16EE..16F8 ; Runic 'Zzzz', # 16F9..16FF ; Unknown - 'Tglg', # 1700..170C ; Tagalog - 'Zzzz', # 170D..170D ; Unknown - 'Tglg', # 170E..1714 ; Tagalog - 'Zzzz', # 1715..171F ; Unknown + 'Tglg', # 1700..1715 ; Tagalog + 'Zzzz', # 1716..171E ; Unknown + 'Tglg', # 171F..171F ; Tagalog 'Hano', # 1720..1734 ; Hanunoo 'Zyyy', # 1735..1736 ; Common 'Zzzz', # 1737..173F ; Unknown @@ -2079,9 +2129,7 @@ 'Zyyy', # 1802..1803 ; Common 'Mong', # 1804..1804 ; Mongolian 'Zyyy', # 1805..1805 ; Common - 'Mong', # 1806..180E ; Mongolian - 'Zzzz', # 180F..180F ; Unknown - 'Mong', # 1810..1819 ; Mongolian + 'Mong', # 1806..1819 ; Mongolian 'Zzzz', # 181A..181F ; Unknown 'Mong', # 1820..1878 ; Mongolian 'Zzzz', # 1879..187F ; Unknown @@ -2123,12 +2171,12 @@ 'Zzzz', # 1A9A..1A9F ; Unknown 'Lana', # 1AA0..1AAD ; Tai_Tham 'Zzzz', # 1AAE..1AAF ; Unknown - 'Zinh', # 1AB0..1AC0 ; Inherited - 'Zzzz', # 1AC1..1AFF ; Unknown - 'Bali', # 1B00..1B4B ; Balinese - 'Zzzz', # 1B4C..1B4F ; Unknown - 'Bali', # 1B50..1B7C ; Balinese - 'Zzzz', # 1B7D..1B7F ; Unknown + 'Zinh', # 1AB0..1ACE ; Inherited + 'Zzzz', # 1ACF..1AFF ; Unknown + 'Bali', # 1B00..1B4C ; Balinese + 'Zzzz', # 1B4D..1B4F ; Unknown + 'Bali', # 1B50..1B7E ; Balinese + 'Zzzz', # 1B7F..1B7F ; Unknown 'Sund', # 1B80..1BBF ; Sundanese 'Batk', # 1BC0..1BF3 ; Batak 'Zzzz', # 1BF4..1BFB ; Unknown @@ -2170,9 +2218,7 @@ 'Cyrl', # 1D78..1D78 ; Cyrillic 'Latn', # 1D79..1DBE ; Latin 'Grek', # 1DBF..1DBF ; Greek - 'Zinh', # 1DC0..1DF9 ; Inherited - 'Zzzz', # 1DFA..1DFA ; Unknown - 'Zinh', # 1DFB..1DFF ; Inherited + 'Zinh', # 1DC0..1DFF ; Inherited 'Latn', # 1E00..1EFF ; Latin 'Grek', # 1F00..1F15 ; Greek 'Zzzz', # 1F16..1F17 ; Unknown @@ -2219,8 +2265,8 @@ 'Zzzz', # 208F..208F ; Unknown 'Latn', # 2090..209C ; Latin 'Zzzz', # 209D..209F ; Unknown - 'Zyyy', # 20A0..20BF ; Common - 'Zzzz', # 20C0..20CF ; Unknown + 'Zyyy', # 20A0..20C0 ; Common + 'Zzzz', # 20C1..20CF ; Unknown 'Zinh', # 20D0..20F0 ; Inherited 'Zzzz', # 20F1..20FF ; Unknown 'Zyyy', # 2100..2125 ; Common @@ -2246,10 +2292,7 @@ 'Zyyy', # 2B76..2B95 ; Common 'Zzzz', # 2B96..2B96 ; Unknown 'Zyyy', # 2B97..2BFF ; Common - 'Glag', # 2C00..2C2E ; Glagolitic - 'Zzzz', # 2C2F..2C2F ; Unknown - 'Glag', # 2C30..2C5E ; Glagolitic - 'Zzzz', # 2C5F..2C5F ; Unknown + 'Glag', # 2C00..2C5F ; Glagolitic 'Latn', # 2C60..2C7F ; Latin 'Copt', # 2C80..2CF3 ; Coptic 'Zzzz', # 2CF4..2CF8 ; Unknown @@ -2284,8 +2327,8 @@ 'Ethi', # 2DD8..2DDE ; Ethiopic 'Zzzz', # 2DDF..2DDF ; Unknown 'Cyrl', # 2DE0..2DFF ; Cyrillic - 'Zyyy', # 2E00..2E52 ; Common - 'Zzzz', # 2E53..2E7F ; Unknown + 'Zyyy', # 2E00..2E5D ; Common + 'Zzzz', # 2E5E..2E7F ; Unknown 'Hani', # 2E80..2E99 ; Han 'Zzzz', # 2E9A..2E9A ; Unknown 'Hani', # 2E9B..2EF3 ; Han @@ -2336,8 +2379,7 @@ 'Zyyy', # 3358..33FF ; Common 'Hani', # 3400..4DBF ; Han 'Zyyy', # 4DC0..4DFF ; Common - 'Hani', # 4E00..9FFC ; Han - 'Zzzz', # 9FFD..9FFF ; Unknown + 'Hani', # 4E00..9FFF ; Han 'Yiii', # A000..A48C ; Yi 'Zzzz', # A48D..A48F ; Unknown 'Yiii', # A490..A4C6 ; Yi @@ -2351,11 +2393,15 @@ 'Zyyy', # A700..A721 ; Common 'Latn', # A722..A787 ; Latin 'Zyyy', # A788..A78A ; Common - 'Latn', # A78B..A7BF ; Latin - 'Zzzz', # A7C0..A7C1 ; Unknown - 'Latn', # A7C2..A7CA ; Latin - 'Zzzz', # A7CB..A7F4 ; Unknown - 'Latn', # A7F5..A7FF ; Latin + 'Latn', # A78B..A7CA ; Latin + 'Zzzz', # A7CB..A7CF ; Unknown + 'Latn', # A7D0..A7D1 ; Latin + 'Zzzz', # A7D2..A7D2 ; Unknown + 'Latn', # A7D3..A7D3 ; Latin + 'Zzzz', # A7D4..A7D4 ; Unknown + 'Latn', # A7D5..A7D9 ; Latin + 'Zzzz', # A7DA..A7F1 ; Unknown + 'Latn', # A7F2..A7FF ; Latin 'Sylo', # A800..A82C ; Syloti_Nagri 'Zzzz', # A82D..A82F ; Unknown 'Zyyy', # A830..A839 ; Common @@ -2443,17 +2489,17 @@ 'Hebr', # FB43..FB44 ; Hebrew 'Zzzz', # FB45..FB45 ; Unknown 'Hebr', # FB46..FB4F ; Hebrew - 'Arab', # FB50..FBC1 ; Arabic - 'Zzzz', # FBC2..FBD2 ; Unknown + 'Arab', # FB50..FBC2 ; Arabic + 'Zzzz', # FBC3..FBD2 ; Unknown 'Arab', # FBD3..FD3D ; Arabic 'Zyyy', # FD3E..FD3F ; Common - 'Zzzz', # FD40..FD4F ; Unknown - 'Arab', # FD50..FD8F ; Arabic + 'Arab', # FD40..FD8F ; Arabic 'Zzzz', # FD90..FD91 ; Unknown 'Arab', # FD92..FDC7 ; Arabic - 'Zzzz', # FDC8..FDEF ; Unknown - 'Arab', # FDF0..FDFD ; Arabic - 'Zzzz', # FDFE..FDFF ; Unknown + 'Zzzz', # FDC8..FDCE ; Unknown + 'Arab', # FDCF..FDCF ; Arabic + 'Zzzz', # FDD0..FDEF ; Unknown + 'Arab', # FDF0..FDFF ; Arabic 'Zinh', # FE00..FE0F ; Inherited 'Zyyy', # FE10..FE19 ; Common 'Zzzz', # FE1A..FE1F ; Unknown @@ -2560,13 +2606,34 @@ 'Aghb', # 10530..10563 ; Caucasian_Albanian 'Zzzz', # 10564..1056E ; Unknown 'Aghb', # 1056F..1056F ; Caucasian_Albanian - 'Zzzz', # 10570..105FF ; Unknown + 'Vith', # 10570..1057A ; Vithkuqi + 'Zzzz', # 1057B..1057B ; Unknown + 'Vith', # 1057C..1058A ; Vithkuqi + 'Zzzz', # 1058B..1058B ; Unknown + 'Vith', # 1058C..10592 ; Vithkuqi + 'Zzzz', # 10593..10593 ; Unknown + 'Vith', # 10594..10595 ; Vithkuqi + 'Zzzz', # 10596..10596 ; Unknown + 'Vith', # 10597..105A1 ; Vithkuqi + 'Zzzz', # 105A2..105A2 ; Unknown + 'Vith', # 105A3..105B1 ; Vithkuqi + 'Zzzz', # 105B2..105B2 ; Unknown + 'Vith', # 105B3..105B9 ; Vithkuqi + 'Zzzz', # 105BA..105BA ; Unknown + 'Vith', # 105BB..105BC ; Vithkuqi + 'Zzzz', # 105BD..105FF ; Unknown 'Lina', # 10600..10736 ; Linear_A 'Zzzz', # 10737..1073F ; Unknown 'Lina', # 10740..10755 ; Linear_A 'Zzzz', # 10756..1075F ; Unknown 'Lina', # 10760..10767 ; Linear_A - 'Zzzz', # 10768..107FF ; Unknown + 'Zzzz', # 10768..1077F ; Unknown + 'Latn', # 10780..10785 ; Latin + 'Zzzz', # 10786..10786 ; Unknown + 'Latn', # 10787..107B0 ; Latin + 'Zzzz', # 107B1..107B1 ; Unknown + 'Latn', # 107B2..107BA ; Latin + 'Zzzz', # 107BB..107FF ; Unknown 'Cprt', # 10800..10805 ; Cypriot 'Zzzz', # 10806..10807 ; Unknown 'Cprt', # 10808..10808 ; Cypriot @@ -2664,18 +2731,20 @@ 'Sogo', # 10F00..10F27 ; Old_Sogdian 'Zzzz', # 10F28..10F2F ; Unknown 'Sogd', # 10F30..10F59 ; Sogdian - 'Zzzz', # 10F5A..10FAF ; Unknown + 'Zzzz', # 10F5A..10F6F ; Unknown + 'Ougr', # 10F70..10F89 ; Old_Uyghur + 'Zzzz', # 10F8A..10FAF ; Unknown 'Chrs', # 10FB0..10FCB ; Chorasmian 'Zzzz', # 10FCC..10FDF ; Unknown 'Elym', # 10FE0..10FF6 ; Elymaic 'Zzzz', # 10FF7..10FFF ; Unknown 'Brah', # 11000..1104D ; Brahmi 'Zzzz', # 1104E..11051 ; Unknown - 'Brah', # 11052..1106F ; Brahmi - 'Zzzz', # 11070..1107E ; Unknown + 'Brah', # 11052..11075 ; Brahmi + 'Zzzz', # 11076..1107E ; Unknown 'Brah', # 1107F..1107F ; Brahmi - 'Kthi', # 11080..110C1 ; Kaithi - 'Zzzz', # 110C2..110CC ; Unknown + 'Kthi', # 11080..110C2 ; Kaithi + 'Zzzz', # 110C3..110CC ; Unknown 'Kthi', # 110CD..110CD ; Kaithi 'Zzzz', # 110CE..110CF ; Unknown 'Sora', # 110D0..110E8 ; Sora_Sompeng @@ -2759,16 +2828,16 @@ 'Zzzz', # 1165A..1165F ; Unknown 'Mong', # 11660..1166C ; Mongolian 'Zzzz', # 1166D..1167F ; Unknown - 'Takr', # 11680..116B8 ; Takri - 'Zzzz', # 116B9..116BF ; Unknown + 'Takr', # 11680..116B9 ; Takri + 'Zzzz', # 116BA..116BF ; Unknown 'Takr', # 116C0..116C9 ; Takri 'Zzzz', # 116CA..116FF ; Unknown 'Ahom', # 11700..1171A ; Ahom 'Zzzz', # 1171B..1171C ; Unknown 'Ahom', # 1171D..1172B ; Ahom 'Zzzz', # 1172C..1172F ; Unknown - 'Ahom', # 11730..1173F ; Ahom - 'Zzzz', # 11740..117FF ; Unknown + 'Ahom', # 11730..11746 ; Ahom + 'Zzzz', # 11747..117FF ; Unknown 'Dogr', # 11800..1183B ; Dogra 'Zzzz', # 1183C..1189F ; Unknown 'Wara', # 118A0..118F2 ; Warang_Citi @@ -2799,7 +2868,8 @@ 'Zanb', # 11A00..11A47 ; Zanabazar_Square 'Zzzz', # 11A48..11A4F ; Unknown 'Soyo', # 11A50..11AA2 ; Soyombo - 'Zzzz', # 11AA3..11ABF ; Unknown + 'Zzzz', # 11AA3..11AAF ; Unknown + 'Cans', # 11AB0..11ABF ; Canadian_Aboriginal 'Pauc', # 11AC0..11AF8 ; Pau_Cin_Hau 'Zzzz', # 11AF9..11BFF ; Unknown 'Bhks', # 11C00..11C08 ; Bhaiksuki @@ -2856,7 +2926,9 @@ 'Xsux', # 12470..12474 ; Cuneiform 'Zzzz', # 12475..1247F ; Unknown 'Xsux', # 12480..12543 ; Cuneiform - 'Zzzz', # 12544..12FFF ; Unknown + 'Zzzz', # 12544..12F8F ; Unknown + 'Cpmn', # 12F90..12FF2 ; Cypro_Minoan + 'Zzzz', # 12FF3..12FFF ; Unknown 'Egyp', # 13000..1342E ; Egyptian_Hieroglyphs 'Zzzz', # 1342F..1342F ; Unknown 'Egyp', # 13430..13438 ; Egyptian_Hieroglyphs @@ -2870,7 +2942,10 @@ 'Mroo', # 16A60..16A69 ; Mro 'Zzzz', # 16A6A..16A6D ; Unknown 'Mroo', # 16A6E..16A6F ; Mro - 'Zzzz', # 16A70..16ACF ; Unknown + 'Tnsa', # 16A70..16ABE ; Tangsa + 'Zzzz', # 16ABF..16ABF ; Unknown + 'Tnsa', # 16AC0..16AC9 ; Tangsa + 'Zzzz', # 16ACA..16ACF ; Unknown 'Bass', # 16AD0..16AED ; Bassa_Vah 'Zzzz', # 16AEE..16AEF ; Unknown 'Bass', # 16AF0..16AF5 ; Bassa_Vah @@ -2895,7 +2970,7 @@ 'Zzzz', # 16FA0..16FDF ; Unknown 'Tang', # 16FE0..16FE0 ; Tangut 'Nshu', # 16FE1..16FE1 ; Nushu - 'Zyyy', # 16FE2..16FE3 ; Common + 'Hani', # 16FE2..16FE3 ; Han 'Kits', # 16FE4..16FE4 ; Khitan_Small_Script 'Zzzz', # 16FE5..16FEF ; Unknown 'Hani', # 16FF0..16FF1 ; Han @@ -2906,10 +2981,17 @@ 'Kits', # 18B00..18CD5 ; Khitan_Small_Script 'Zzzz', # 18CD6..18CFF ; Unknown 'Tang', # 18D00..18D08 ; Tangut - 'Zzzz', # 18D09..1AFFF ; Unknown + 'Zzzz', # 18D09..1AFEF ; Unknown + 'Kana', # 1AFF0..1AFF3 ; Katakana + 'Zzzz', # 1AFF4..1AFF4 ; Unknown + 'Kana', # 1AFF5..1AFFB ; Katakana + 'Zzzz', # 1AFFC..1AFFC ; Unknown + 'Kana', # 1AFFD..1AFFE ; Katakana + 'Zzzz', # 1AFFF..1AFFF ; Unknown 'Kana', # 1B000..1B000 ; Katakana - 'Hira', # 1B001..1B11E ; Hiragana - 'Zzzz', # 1B11F..1B14F ; Unknown + 'Hira', # 1B001..1B11F ; Hiragana + 'Kana', # 1B120..1B122 ; Katakana + 'Zzzz', # 1B123..1B14F ; Unknown 'Hira', # 1B150..1B152 ; Hiragana 'Zzzz', # 1B153..1B163 ; Unknown 'Kana', # 1B164..1B167 ; Katakana @@ -2926,7 +3008,13 @@ 'Zzzz', # 1BC9A..1BC9B ; Unknown 'Dupl', # 1BC9C..1BC9F ; Duployan 'Zyyy', # 1BCA0..1BCA3 ; Common - 'Zzzz', # 1BCA4..1CFFF ; Unknown + 'Zzzz', # 1BCA4..1CEFF ; Unknown + 'Zinh', # 1CF00..1CF2D ; Inherited + 'Zzzz', # 1CF2E..1CF2F ; Unknown + 'Zinh', # 1CF30..1CF46 ; Inherited + 'Zzzz', # 1CF47..1CF4F ; Unknown + 'Zyyy', # 1CF50..1CFC3 ; Common + 'Zzzz', # 1CFC4..1CFFF ; Unknown 'Zyyy', # 1D000..1D0F5 ; Common 'Zzzz', # 1D0F6..1D0FF ; Unknown 'Zyyy', # 1D100..1D126 ; Common @@ -2939,8 +3027,8 @@ 'Zinh', # 1D185..1D18B ; Inherited 'Zyyy', # 1D18C..1D1A9 ; Common 'Zinh', # 1D1AA..1D1AD ; Inherited - 'Zyyy', # 1D1AE..1D1E8 ; Common - 'Zzzz', # 1D1E9..1D1FF ; Unknown + 'Zyyy', # 1D1AE..1D1EA ; Common + 'Zzzz', # 1D1EB..1D1FF ; Unknown 'Grek', # 1D200..1D245 ; Greek 'Zzzz', # 1D246..1D2DF ; Unknown 'Zyyy', # 1D2E0..1D2F3 ; Common @@ -2995,7 +3083,9 @@ 'Sgnw', # 1DA9B..1DA9F ; SignWriting 'Zzzz', # 1DAA0..1DAA0 ; Unknown 'Sgnw', # 1DAA1..1DAAF ; SignWriting - 'Zzzz', # 1DAB0..1DFFF ; Unknown + 'Zzzz', # 1DAB0..1DEFF ; Unknown + 'Latn', # 1DF00..1DF1E ; Latin + 'Zzzz', # 1DF1F..1DFFF ; Unknown 'Glag', # 1E000..1E006 ; Glagolitic 'Zzzz', # 1E007..1E007 ; Unknown 'Glag', # 1E008..1E018 ; Glagolitic @@ -3013,11 +3103,21 @@ 'Hmnp', # 1E140..1E149 ; Nyiakeng_Puachue_Hmong 'Zzzz', # 1E14A..1E14D ; Unknown 'Hmnp', # 1E14E..1E14F ; Nyiakeng_Puachue_Hmong - 'Zzzz', # 1E150..1E2BF ; Unknown + 'Zzzz', # 1E150..1E28F ; Unknown + 'Toto', # 1E290..1E2AE ; Toto + 'Zzzz', # 1E2AF..1E2BF ; Unknown 'Wcho', # 1E2C0..1E2F9 ; Wancho 'Zzzz', # 1E2FA..1E2FE ; Unknown 'Wcho', # 1E2FF..1E2FF ; Wancho - 'Zzzz', # 1E300..1E7FF ; Unknown + 'Zzzz', # 1E300..1E7DF ; Unknown + 'Ethi', # 1E7E0..1E7E6 ; Ethiopic + 'Zzzz', # 1E7E7..1E7E7 ; Unknown + 'Ethi', # 1E7E8..1E7EB ; Ethiopic + 'Zzzz', # 1E7EC..1E7EC ; Unknown + 'Ethi', # 1E7ED..1E7EE ; Ethiopic + 'Zzzz', # 1E7EF..1E7EF ; Unknown + 'Ethi', # 1E7F0..1E7FE ; Ethiopic + 'Zzzz', # 1E7FF..1E7FF ; Unknown 'Mend', # 1E800..1E8C4 ; Mende_Kikakui 'Zzzz', # 1E8C5..1E8C6 ; Unknown 'Mend', # 1E8C7..1E8D6 ; Mende_Kikakui @@ -3127,8 +3227,8 @@ 'Zyyy', # 1F260..1F265 ; Common 'Zzzz', # 1F266..1F2FF ; Unknown 'Zyyy', # 1F300..1F6D7 ; Common - 'Zzzz', # 1F6D8..1F6DF ; Unknown - 'Zyyy', # 1F6E0..1F6EC ; Common + 'Zzzz', # 1F6D8..1F6DC ; Unknown + 'Zyyy', # 1F6DD..1F6EC ; Common 'Zzzz', # 1F6ED..1F6EF ; Unknown 'Zyyy', # 1F6F0..1F6FC ; Common 'Zzzz', # 1F6FD..1F6FF ; Unknown @@ -3137,7 +3237,9 @@ 'Zyyy', # 1F780..1F7D8 ; Common 'Zzzz', # 1F7D9..1F7DF ; Unknown 'Zyyy', # 1F7E0..1F7EB ; Common - 'Zzzz', # 1F7EC..1F7FF ; Unknown + 'Zzzz', # 1F7EC..1F7EF ; Unknown + 'Zyyy', # 1F7F0..1F7F0 ; Common + 'Zzzz', # 1F7F1..1F7FF ; Unknown 'Zyyy', # 1F800..1F80B ; Common 'Zzzz', # 1F80C..1F80F ; Unknown 'Zyyy', # 1F810..1F847 ; Common @@ -3150,38 +3252,38 @@ 'Zzzz', # 1F8AE..1F8AF ; Unknown 'Zyyy', # 1F8B0..1F8B1 ; Common 'Zzzz', # 1F8B2..1F8FF ; Unknown - 'Zyyy', # 1F900..1F978 ; Common - 'Zzzz', # 1F979..1F979 ; Unknown - 'Zyyy', # 1F97A..1F9CB ; Common - 'Zzzz', # 1F9CC..1F9CC ; Unknown - 'Zyyy', # 1F9CD..1FA53 ; Common + 'Zyyy', # 1F900..1FA53 ; Common 'Zzzz', # 1FA54..1FA5F ; Unknown 'Zyyy', # 1FA60..1FA6D ; Common 'Zzzz', # 1FA6E..1FA6F ; Unknown 'Zyyy', # 1FA70..1FA74 ; Common 'Zzzz', # 1FA75..1FA77 ; Unknown - 'Zyyy', # 1FA78..1FA7A ; Common - 'Zzzz', # 1FA7B..1FA7F ; Unknown + 'Zyyy', # 1FA78..1FA7C ; Common + 'Zzzz', # 1FA7D..1FA7F ; Unknown 'Zyyy', # 1FA80..1FA86 ; Common 'Zzzz', # 1FA87..1FA8F ; Unknown - 'Zyyy', # 1FA90..1FAA8 ; Common - 'Zzzz', # 1FAA9..1FAAF ; Unknown - 'Zyyy', # 1FAB0..1FAB6 ; Common - 'Zzzz', # 1FAB7..1FABF ; Unknown - 'Zyyy', # 1FAC0..1FAC2 ; Common - 'Zzzz', # 1FAC3..1FACF ; Unknown - 'Zyyy', # 1FAD0..1FAD6 ; Common - 'Zzzz', # 1FAD7..1FAFF ; Unknown + 'Zyyy', # 1FA90..1FAAC ; Common + 'Zzzz', # 1FAAD..1FAAF ; Unknown + 'Zyyy', # 1FAB0..1FABA ; Common + 'Zzzz', # 1FABB..1FABF ; Unknown + 'Zyyy', # 1FAC0..1FAC5 ; Common + 'Zzzz', # 1FAC6..1FACF ; Unknown + 'Zyyy', # 1FAD0..1FAD9 ; Common + 'Zzzz', # 1FADA..1FADF ; Unknown + 'Zyyy', # 1FAE0..1FAE7 ; Common + 'Zzzz', # 1FAE8..1FAEF ; Unknown + 'Zyyy', # 1FAF0..1FAF6 ; Common + 'Zzzz', # 1FAF7..1FAFF ; Unknown 'Zyyy', # 1FB00..1FB92 ; Common 'Zzzz', # 1FB93..1FB93 ; Unknown 'Zyyy', # 1FB94..1FBCA ; Common 'Zzzz', # 1FBCB..1FBEF ; Unknown 'Zyyy', # 1FBF0..1FBF9 ; Common 'Zzzz', # 1FBFA..1FFFF ; Unknown - 'Hani', # 20000..2A6DD ; Han - 'Zzzz', # 2A6DE..2A6FF ; Unknown - 'Hani', # 2A700..2B734 ; Han - 'Zzzz', # 2B735..2B73F ; Unknown + 'Hani', # 20000..2A6DF ; Han + 'Zzzz', # 2A6E0..2A6FF ; Unknown + 'Hani', # 2A700..2B738 ; Han + 'Zzzz', # 2B739..2B73F ; Unknown 'Hani', # 2B740..2B81D ; Han 'Zzzz', # 2B81E..2B81F ; Unknown 'Hani', # 2B820..2CEA1 ; Han @@ -3226,6 +3328,7 @@ 'Cher': 'Cherokee', 'Chrs': 'Chorasmian', 'Copt': 'Coptic', + 'Cpmn': 'Cypro_Minoan', 'Cprt': 'Cypriot', 'Cyrl': 'Cyrillic', 'Deva': 'Devanagari', @@ -3305,6 +3408,7 @@ 'Orya': 'Oriya', 'Osge': 'Osage', 'Osma': 'Osmanya', + 'Ougr': 'Old_Uyghur', 'Palm': 'Palmyrene', 'Pauc': 'Pau_Cin_Hau', 'Perm': 'Old_Permic', @@ -3347,8 +3451,11 @@ 'Thai': 'Thai', 'Tibt': 'Tibetan', 'Tirh': 'Tirhuta', + 'Tnsa': 'Tangsa', + 'Toto': 'Toto', 'Ugar': 'Ugaritic', 'Vaii': 'Vai', + 'Vith': 'Vithkuqi', 'Wara': 'Warang_Citi', 'Wcho': 'Wancho', 'Xpeo': 'Old_Persian', diff --git a/Lib/fontTools/unicodedata/__init__.py b/Lib/fontTools/unicodedata/__init__.py index 1c1210f970..f5e5132c03 100644 --- a/Lib/fontTools/unicodedata/__init__.py +++ b/Lib/fontTools/unicodedata/__init__.py @@ -73,7 +73,7 @@ def script_extension(char): >>> script_extension("a") == {'Latn'} True - >>> script_extension(chr(0x060C)) == {'Rohg', 'Syrc', 'Yezi', 'Arab', 'Thaa'} + >>> script_extension(chr(0x060C)) == {'Rohg', 'Syrc', 'Yezi', 'Arab', 'Thaa', 'Nkoo'} True >>> script_extension(chr(0x10FFFF)) == {'Zzzz'} True diff --git a/Tests/unicodedata_test.py b/Tests/unicodedata_test.py index 05f7de684b..2ae11beffe 100644 --- a/Tests/unicodedata_test.py +++ b/Tests/unicodedata_test.py @@ -198,7 +198,7 @@ def test_block(): assert unicodedata.block("\x7F") == "Basic Latin" assert unicodedata.block("\x80") == "Latin-1 Supplement" assert unicodedata.block("\u1c90") == "Georgian Extended" - assert unicodedata.block("\u0870") == "No_Block" + assert unicodedata.block("\u0870") == "Arabic Extended-B" def test_ot_tags_from_script():