Skip to content

Commit 9239e7e

Browse files
committed
data: update to UCD 16
1 parent 7691e49 commit 9239e7e

18 files changed

+2149
-625
lines changed

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -230,15 +230,15 @@ impl Config {
230230
/// # if cfg!(miri) { return Ok(()); } // miri takes too long
231231
/// use regex_automata::nfa::thompson::NFA;
232232
///
233-
/// // 300KB isn't enough!
233+
/// // 400KB isn't enough!
234234
/// NFA::compiler()
235-
/// .configure(NFA::config().nfa_size_limit(Some(300_000)))
235+
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
236236
/// .build(r"\w{20}")
237237
/// .unwrap_err();
238238
///
239-
/// // ... but 400KB probably is.
239+
/// // ... but 500KB probably is.
240240
/// let nfa = NFA::compiler()
241-
/// .configure(NFA::config().nfa_size_limit(Some(400_000)))
241+
/// .configure(NFA::config().nfa_size_limit(Some(500_000)))
242242
/// .build(r"\w{20}")?;
243243
///
244244
/// assert_eq!(nfa.pattern_len(), 1);

regex-automata/src/util/unicode_data/perl_word.rs

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
22
//
3-
// ucd-generate perl-word tmp/ucd-15.0.0/ --chars
3+
// ucd-generate perl-word ucd-16.0.0 --chars
44
//
5-
// Unicode version: 15.0.0.
5+
// Unicode version: 16.0.0.
66
//
7-
// ucd-generate 0.2.15 is available on crates.io.
7+
// ucd-generate 0.3.1 is available on crates.io.
88

99
pub const PERL_WORD: &'static [(char, char)] = &[
1010
('0', '9'),
@@ -59,7 +59,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
5959
('ࡠ', 'ࡪ'),
6060
('ࡰ', 'ࢇ'),
6161
('ࢉ', 'ࢎ'),
62-
('\u{898}', '\u{8e1}'),
62+
('\u{897}', '\u{8e1}'),
6363
('\u{8e3}', '\u{963}'),
6464
('०', '९'),
6565
('ॱ', 'ঃ'),
@@ -158,8 +158,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
158158
('ಪ', 'ಳ'),
159159
('ವ', 'ಹ'),
160160
('\u{cbc}', 'ೄ'),
161-
('\u{cc6}', ''),
162-
('', '\u{ccd}'),
161+
('\u{cc6}', '\u{cc8}'),
162+
('\u{cca}', '\u{ccd}'),
163163
('\u{cd5}', '\u{cd6}'),
164164
('ೝ', 'ೞ'),
165165
('ೠ', '\u{ce3}'),
@@ -243,8 +243,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
243243
('ᚁ', 'ᚚ'),
244244
('ᚠ', 'ᛪ'),
245245
('ᛮ', 'ᛸ'),
246-
('ᜀ', ''),
247-
('ᜟ', ''),
246+
('ᜀ', '\u{1715}'),
247+
('ᜟ', '\u{1734}'),
248248
('ᝀ', '\u{1753}'),
249249
('ᝠ', 'ᝬ'),
250250
('ᝮ', 'ᝰ'),
@@ -276,11 +276,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
276276
('\u{1b00}', 'ᭌ'),
277277
('᭐', '᭙'),
278278
('\u{1b6b}', '\u{1b73}'),
279-
('\u{1b80}', ''),
279+
('\u{1b80}', '\u{1bf3}'),
280280
('ᰀ', '\u{1c37}'),
281281
('᱀', '᱉'),
282282
('ᱍ', 'ᱽ'),
283-
('ᲀ', ''),
283+
('ᲀ', ''),
284284
('Ა', 'Ჺ'),
285285
('Ჽ', 'Ჿ'),
286286
('\u{1cd0}', '\u{1cd2}'),
@@ -367,10 +367,10 @@ pub const PERL_WORD: &'static [(char, char)] = &[
367367
('ꙿ', '\u{a6f1}'),
368368
('ꜗ', 'ꜟ'),
369369
('Ꜣ', 'ꞈ'),
370-
('Ꞌ', ''),
370+
('Ꞌ', ''),
371371
('Ꟑ', 'ꟑ'),
372372
('ꟓ', 'ꟓ'),
373-
('ꟕ', ''),
373+
('ꟕ', ''),
374374
('ꟲ', 'ꠧ'),
375375
('\u{a82c}', '\u{a82c}'),
376376
('ꡀ', 'ꡳ'),
@@ -379,9 +379,9 @@ pub const PERL_WORD: &'static [(char, char)] = &[
379379
('\u{a8e0}', 'ꣷ'),
380380
('ꣻ', 'ꣻ'),
381381
('ꣽ', '\u{a92d}'),
382-
('ꤰ', ''),
382+
('ꤰ', '\u{a953}'),
383383
('ꥠ', 'ꥼ'),
384-
('\u{a980}', ''),
384+
('\u{a980}', '\u{a9c0}'),
385385
('ꧏ', '꧙'),
386386
('ꧠ', 'ꧾ'),
387387
('ꨀ', '\u{aa36}'),
@@ -468,6 +468,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
468468
('𐖣', '𐖱'),
469469
('𐖳', '𐖹'),
470470
('𐖻', '𐖼'),
471+
('𐗀', '𐗳'),
471472
('𐘀', '𐜶'),
472473
('𐝀', '𐝕'),
473474
('𐝠', '𐝧'),
@@ -508,10 +509,14 @@ pub const PERL_WORD: &'static [(char, char)] = &[
508509
('𐳀', '𐳲'),
509510
('𐴀', '\u{10d27}'),
510511
('𐴰', '𐴹'),
512+
('𐵀', '𐵥'),
513+
('\u{10d69}', '\u{10d6d}'),
514+
('𐵯', '𐶅'),
511515
('𐺀', '𐺩'),
512516
('\u{10eab}', '\u{10eac}'),
513517
('𐺰', '𐺱'),
514-
('\u{10efd}', '𐼜'),
518+
('𐻂', '𐻄'),
519+
('\u{10efc}', '𐼜'),
515520
('𐼧', '𐼧'),
516521
('𐼰', '\u{10f50}'),
517522
('𐽰', '\u{10f85}'),
@@ -551,12 +556,22 @@ pub const PERL_WORD: &'static [(char, char)] = &[
551556
('𑌵', '𑌹'),
552557
('\u{1133b}', '𑍄'),
553558
('𑍇', '𑍈'),
554-
('𑍋', '𑍍'),
559+
('𑍋', '\u{1134d}'),
555560
('𑍐', '𑍐'),
556561
('\u{11357}', '\u{11357}'),
557562
('𑍝', '𑍣'),
558563
('\u{11366}', '\u{1136c}'),
559564
('\u{11370}', '\u{11374}'),
565+
('𑎀', '𑎉'),
566+
('𑎋', '𑎋'),
567+
('𑎎', '𑎎'),
568+
('𑎐', '𑎵'),
569+
('𑎷', '\u{113c0}'),
570+
('\u{113c2}', '\u{113c2}'),
571+
('\u{113c5}', '\u{113c5}'),
572+
('\u{113c7}', '𑏊'),
573+
('𑏌', '𑏓'),
574+
('\u{113e1}', '\u{113e2}'),
560575
('𑐀', '𑑊'),
561576
('𑑐', '𑑙'),
562577
('\u{1145e}', '𑑡'),
@@ -571,6 +586,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
571586
('𑙐', '𑙙'),
572587
('𑚀', '𑚸'),
573588
('𑛀', '𑛉'),
589+
('𑛐', '𑛣'),
574590
('𑜀', '𑜚'),
575591
('\u{1171d}', '\u{1172b}'),
576592
('𑜰', '𑜹'),
@@ -594,6 +610,8 @@ pub const PERL_WORD: &'static [(char, char)] = &[
594610
('𑩐', '\u{11a99}'),
595611
('𑪝', '𑪝'),
596612
('𑪰', '𑫸'),
613+
('𑯀', '𑯠'),
614+
('𑯰', '𑯹'),
597615
('𑰀', '𑰈'),
598616
('𑰊', '\u{11c36}'),
599617
('\u{11c38}', '𑱀'),
@@ -618,15 +636,17 @@ pub const PERL_WORD: &'static [(char, char)] = &[
618636
('\u{11f00}', '𑼐'),
619637
('𑼒', '\u{11f3a}'),
620638
('𑼾', '\u{11f42}'),
621-
('𑽐', '𑽙'),
639+
('𑽐', '\u{11f5a}'),
622640
('𑾰', '𑾰'),
623641
('𒀀', '𒎙'),
624642
('𒐀', '𒑮'),
625643
('𒒀', '𒕃'),
626644
('𒾐', '𒿰'),
627645
('𓀀', '𓐯'),
628646
('\u{13440}', '\u{13455}'),
647+
('𓑠', '𔏺'),
629648
('𔐀', '𔙆'),
649+
('𖄀', '𖄹'),
630650
('𖠀', '𖨸'),
631651
('𖩀', '𖩞'),
632652
('𖩠', '𖩩'),
@@ -639,16 +659,18 @@ pub const PERL_WORD: &'static [(char, char)] = &[
639659
('𖭐', '𖭙'),
640660
('𖭣', '𖭷'),
641661
('𖭽', '𖮏'),
662+
('𖵀', '𖵬'),
663+
('𖵰', '𖵹'),
642664
('𖹀', '𖹿'),
643665
('𖼀', '𖽊'),
644666
('\u{16f4f}', '𖾇'),
645667
('\u{16f8f}', '𖾟'),
646668
('𖿠', '𖿡'),
647669
('𖿣', '\u{16fe4}'),
648-
('𖿰', '𖿱'),
670+
('\u{16ff0}', '\u{16ff1}'),
649671
('𗀀', '𘟷'),
650672
('𘠀', '𘳕'),
651-
('𘴀', '𘴈'),
673+
('𘳿', '𘴈'),
652674
('𚿰', '𚿳'),
653675
('𚿵', '𚿻'),
654676
('𚿽', '𚿾'),
@@ -663,10 +685,11 @@ pub const PERL_WORD: &'static [(char, char)] = &[
663685
('𛲀', '𛲈'),
664686
('𛲐', '𛲙'),
665687
('\u{1bc9d}', '\u{1bc9e}'),
688+
('𜳰', '𜳹'),
666689
('\u{1cf00}', '\u{1cf2d}'),
667690
('\u{1cf30}', '\u{1cf46}'),
668691
('\u{1d165}', '\u{1d169}'),
669-
('𝅭', '\u{1d172}'),
692+
('\u{1d16d}', '\u{1d172}'),
670693
('\u{1d17b}', '\u{1d182}'),
671694
('\u{1d185}', '\u{1d18b}'),
672695
('\u{1d1aa}', '\u{1d1ad}'),
@@ -724,6 +747,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
724747
('𞊐', '\u{1e2ae}'),
725748
('𞋀', '𞋹'),
726749
('𞓐', '𞓹'),
750+
('𞗐', '𞗺'),
727751
('𞟠', '𞟦'),
728752
('𞟨', '𞟫'),
729753
('𞟭', '𞟮'),
@@ -774,6 +798,7 @@ pub const PERL_WORD: &'static [(char, char)] = &[
774798
('𫝀', '𫠝'),
775799
('𫠠', '𬺡'),
776800
('𬺰', '𮯠'),
801+
('𮯰', '𮹝'),
777802
('丽', '𪘀'),
778803
('𰀀', '𱍊'),
779804
('𱍐', '𲎯'),

regex-syntax/src/hir/translate.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3143,10 +3143,31 @@ mod tests {
31433143
#[cfg(feature = "unicode-script")]
31443144
assert_eq!(
31453145
t(r"[\p{sc:Greek}~~\p{scx:Greek}]"),
3146+
// Class({
3147+
// '·'..='·',
3148+
// '\u{300}'..='\u{301}',
3149+
// '\u{304}'..='\u{304}',
3150+
// '\u{306}'..='\u{306}',
3151+
// '\u{308}'..='\u{308}',
3152+
// '\u{313}'..='\u{313}',
3153+
// '\u{342}'..='\u{342}',
3154+
// '\u{345}'..='\u{345}',
3155+
// 'ʹ'..='ʹ',
3156+
// '\u{1dc0}'..='\u{1dc1}',
3157+
// '⁝'..='⁝',
3158+
// })
31463159
hir_uclass(&[
3160+
('·', '·'),
3161+
('\u{0300}', '\u{0301}'),
3162+
('\u{0304}', '\u{0304}'),
3163+
('\u{0306}', '\u{0306}'),
3164+
('\u{0308}', '\u{0308}'),
3165+
('\u{0313}', '\u{0313}'),
31473166
('\u{0342}', '\u{0342}'),
31483167
('\u{0345}', '\u{0345}'),
3168+
('ʹ', 'ʹ'),
31493169
('\u{1DC0}', '\u{1DC1}'),
3170+
('⁝', '⁝'),
31503171
])
31513172
);
31523173
assert_eq!(t(r"[a-g~~c-j]"), hir_uclass(&[('a', 'b'), ('h', 'j')]));

regex-syntax/src/unicode.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,8 @@ fn ages(canonical_age: &str) -> Result<impl Iterator<Item = Range>, Error> {
675675
("V13_0", age::V13_0),
676676
("V14_0", age::V14_0),
677677
("V15_0", age::V15_0),
678+
("V15_1", age::V15_1),
679+
("V16_0", age::V16_0),
678680
];
679681
assert_eq!(AGES.len(), age::BY_NAME.len(), "ages are out of sync");
680682

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy