Skip to content

Commit 061dc3c

Browse files
Ensure tables.rs passes rustfmt
1 parent a6a221a commit 061dc3c

File tree

4 files changed

+9281
-24580
lines changed

4 files changed

+9281
-24580
lines changed

.github/workflows/rust.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ jobs:
6565
runs-on: ubuntu-latest
6666
steps:
6767
- uses: actions/checkout@v3
68+
- uses: actions/setup-python@v5
69+
with:
70+
python-version: '3.12'
6871
- name: Regen
6972
run: cd scripts && python3 unicode.py
7073
- name: Diff tables

scripts/unicode.py

Lines changed: 26 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# out-of-line and check the tables.rs and normalization_tests.rs files into git.
2121
import collections
2222
import urllib.request
23+
from itertools import batched
2324

2425
UNICODE_VERSION = "15.1.0"
2526
UCD_URL = "https://www.unicode.org/Public/%s/ucd/" % UNICODE_VERSION
@@ -354,20 +355,26 @@ def is_first_and_last(first, last):
354355
return False
355356
return first[1:-8] == last[1:-7]
356357

357-
def gen_mph_data(name, d, kv_type, kv_callback):
358+
def gen_mph_data(name, d, kv_type, kv_callback, kv_row_width):
358359
(salt, keys) = minimal_perfect_hash(d)
359-
out.write("pub(crate) const %s_SALT: &[u16] = &[\n" % name.upper())
360-
for s in salt:
361-
out.write(" 0x{:x},\n".format(s))
360+
out.write(f"\npub(crate) const {name.upper()}_SALT: &[u16] = &[\n")
361+
for s_row in batched(salt, 13):
362+
out.write(" ")
363+
for s in s_row:
364+
out.write(f" 0x{s:03X},")
365+
out.write("\n")
366+
out.write("];\n")
367+
out.write(f"pub(crate) const {name.upper()}_KV: &[{kv_type}] = &[\n")
368+
for k_row in batched(keys, kv_row_width):
369+
out.write(" ")
370+
for k in k_row:
371+
out.write(f" {kv_callback(k)},")
372+
out.write("\n")
362373
out.write("];\n")
363-
out.write("pub(crate) const {}_KV: &[{}] = &[\n".format(name.upper(), kv_type))
364-
for k in keys:
365-
out.write(" {},\n".format(kv_callback(k)))
366-
out.write("];\n\n")
367374

368375
def gen_combining_class(combining_classes, out):
369376
gen_mph_data('canonical_combining_class', combining_classes, 'u32',
370-
lambda k: "0x{:X}".format(int(combining_classes[k]) | (k << 8)))
377+
lambda k: f"0x{int(combining_classes[k]) | (k << 8):07X}", 8)
371378

372379
def gen_composition_table(canon_comp, out):
373380
table = {}
@@ -376,7 +383,7 @@ def gen_composition_table(canon_comp, out):
376383
table[(c1 << 16) | c2] = c3
377384
(salt, keys) = minimal_perfect_hash(table)
378385
gen_mph_data('COMPOSITION_TABLE', table, '(u32, char)',
379-
lambda k: "(0x%s, '\\u{%s}')" % (hexify(k), hexify(table[k])))
386+
lambda k: f"(0x{k:08X}, '\\u{{{table[k]:06X}}}')", 1)
380387

381388
out.write("pub(crate) fn composition_table_astral(c1: char, c2: char) -> Option<char> {\n")
382389
out.write(" match (c1, c2) {\n")
@@ -403,7 +410,7 @@ def gen_decomposition_tables(canon_decomp, compat_decomp, cjk_compat_variants_de
403410
assert offset < 65536
404411
out.write("];\n")
405412
gen_mph_data(name + '_decomposed', table, "(u32, (u16, u16))",
406-
lambda k: "(0x{:x}, ({}, {}))".format(k, offsets[k], len(table[k])))
413+
lambda k: f"(0x{k:05X}, (0x{offsets[k]:03X}, 0x{len(table[k]):X}))", 1)
407414

408415
def gen_qc_match(prop_table, out):
409416
out.write(" match c {\n")
@@ -421,7 +428,7 @@ def gen_qc_match(prop_table, out):
421428
out.write(" }\n")
422429

423430
def gen_nfc_qc(prop_tables, out):
424-
out.write("#[inline]\n")
431+
out.write("\n#[inline]\n")
425432
out.write("#[allow(ellipsis_inclusive_range_patterns)]\n")
426433
out.write("pub fn qc_nfc(c: char) -> IsNormalized {\n")
427434
gen_qc_match(prop_tables['NFC_QC'], out)
@@ -450,7 +457,7 @@ def gen_nfkd_qc(prop_tables, out):
450457

451458
def gen_combining_mark(general_category_mark, out):
452459
gen_mph_data('combining_mark', general_category_mark, 'u32',
453-
lambda k: '0x{:04x}'.format(k))
460+
lambda k: '0x{:05X}'.format(k), 10)
454461

455462
def gen_public_assigned(general_category_public_assigned, out):
456463
# This could be done as a hash but the table is somewhat small.
@@ -464,17 +471,16 @@ def gen_public_assigned(general_category_public_assigned, out):
464471
out.write(" ")
465472
start = False
466473
else:
467-
out.write(" | ")
474+
out.write("\n | ")
468475
if first == last:
469-
out.write("'\\u{%s}'\n" % hexify(first))
476+
out.write("'\\u{%s}'" % hexify(first))
470477
else:
471-
out.write("'\\u{%s}'..='\\u{%s}'\n" % (hexify(first), hexify(last)))
472-
out.write(" => true,\n")
478+
out.write("'\\u{%s}'..='\\u{%s}'" % (hexify(first), hexify(last)))
479+
out.write(" => true,\n")
473480

474481
out.write(" _ => false,\n")
475482
out.write(" }\n")
476483
out.write("}\n")
477-
out.write("\n")
478484

479485
def gen_stream_safe(leading, trailing, out):
480486
# This could be done as a hash but the table is very small.
@@ -488,10 +494,9 @@ def gen_stream_safe(leading, trailing, out):
488494
out.write(" _ => 0,\n")
489495
out.write(" }\n")
490496
out.write("}\n")
491-
out.write("\n")
492497

493498
gen_mph_data('trailing_nonstarters', trailing, 'u32',
494-
lambda k: "0x{:X}".format(int(trailing[k]) | (k << 8)))
499+
lambda k: f"0x{int(trailing[k]) | (k << 8):07X}", 8)
495500

496501
def gen_tests(tests, out):
497502
out.write("""#[derive(Debug)]
@@ -579,43 +584,33 @@ def minimal_perfect_hash(d):
579584
data = UnicodeData()
580585
with open("tables.rs", "w", newline = "\n") as out:
581586
out.write(PREAMBLE)
582-
out.write("#![cfg_attr(rustfmt, rustfmt::skip)]\n")
583587
out.write("use crate::quick_check::IsNormalized;\n")
584588
out.write("use crate::quick_check::IsNormalized::*;\n")
585589
out.write("\n")
586590

587591
version = "(%s, %s, %s)" % tuple(UNICODE_VERSION.split("."))
588592
out.write("#[allow(unused)]\n")
589-
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n\n" % version)
593+
out.write("pub const UNICODE_VERSION: (u8, u8, u8) = %s;\n" % version)
590594

591595
gen_combining_class(data.combining_classes, out)
592-
out.write("\n")
593596

594597
gen_composition_table(data.canon_comp, out)
595-
out.write("\n")
596598

597599
gen_decomposition_tables(data.canon_fully_decomp, data.compat_fully_decomp, data.cjk_compat_variants_fully_decomp, out)
598600

599601
gen_combining_mark(data.general_category_mark, out)
600-
out.write("\n")
601602

602603
gen_public_assigned(data.general_category_public_assigned, out)
603-
out.write("\n")
604604

605605
gen_nfc_qc(data.norm_props, out)
606-
out.write("\n")
607606

608607
gen_nfkc_qc(data.norm_props, out)
609-
out.write("\n")
610608

611609
gen_nfd_qc(data.norm_props, out)
612-
out.write("\n")
613610

614611
gen_nfkd_qc(data.norm_props, out)
615-
out.write("\n")
616612

617613
gen_stream_safe(data.ss_leading, data.ss_trailing, out)
618-
out.write("\n")
619614

620615
with open("normalization_tests.rs", "w", newline = "\n") as out:
621616
out.write(PREAMBLE)

src/lib.rs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ mod quick_check;
7272
mod recompose;
7373
mod replace;
7474
mod stream_safe;
75-
76-
#[rustfmt::skip]
7775
mod tables;
7876

7977
#[doc(hidden)]

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy