Skip to content

Commit b7f4c29

Browse files
committed
Add documentation
1 parent 52c550c commit b7f4c29

File tree

3 files changed

+162
-60
lines changed

3 files changed

+162
-60
lines changed

scripts/unicode.py

Lines changed: 42 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -190,70 +190,72 @@ def emit_general_category_module(f):
190190
f.write("""
191191
192192
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
193+
/// The most general classification of a character.
193194
pub enum GeneralCategory {
194-
/// an uppercase letter
195+
/// `Lu`, an uppercase letter
195196
LetterUppercase,
196-
/// a lowercase letter
197+
/// `Ll`, a lowercase letter
197198
LetterLowercase,
198-
/// a digraphic character, with first part uppercase
199+
/// `Lt`, a digraphic character, with first part uppercase
199200
LetterTitlecase,
200-
/// a modifier letter
201+
/// `Lm`, a modifier letter
201202
LetterModifier,
202-
/// other letters, including syllables and ideographs
203+
/// `Lo`, other letters, including syllables and ideographs
203204
LetterOther,
204-
/// a nonspacing combining mark (zero advance width)
205+
/// `Mn`, a nonspacing combining mark (zero advance width)
205206
MarkNonspacing,
206-
/// a spacing combining mark (positive advance width)
207+
/// `Mc`, a spacing combining mark (positive advance width)
207208
MarkSpacing,
208-
/// an enclosing combining mark
209+
/// `Me`, an enclosing combining mark
209210
MarkEnclosing,
210-
/// a decimal digit
211+
/// `Nd`, a decimal digit
211212
NumberDecimal,
212-
/// a letterlike numeric character
213+
/// `Nl`, a letterlike numeric character
213214
NumberLetter,
214-
/// a numeric character of other type
215+
/// `No`, a numeric character of other type
215216
NumberOther,
216-
/// a connecting punctuation mark, like a tie
217+
/// `Pc`, a connecting punctuation mark, like a tie
217218
PunctuationConnector,
218-
/// a dash or hyphen punctuation mark
219+
/// `Pd`, a dash or hyphen punctuation mark
219220
PunctuationDash,
220-
/// an opening punctuation mark (of a pair)
221+
/// `Ps`, an opening punctuation mark (of a pair)
221222
PunctuationOpen,
222-
/// a closing punctuation mark (of a pair)
223+
/// `Pe`, a closing punctuation mark (of a pair)
223224
PunctuationClose,
224-
/// an initial quotation mark
225+
/// `Pi`, an initial quotation mark
225226
PunctuationInitial,
226-
/// a final quotation mark
227+
/// `Pf`, a final quotation mark
227228
PunctuationFinal,
228-
/// a punctuation mark of other type
229+
/// `Po`, a punctuation mark of other type
229230
PunctuationOther,
230-
/// a symbol of mathematical use
231+
/// `Sm`, a symbol of mathematical use
231232
SymbolMath,
232-
/// a currency sign
233+
/// `Sc`, a currency sign
233234
SymbolCurrency,
234-
/// a non-letterlike modifier symbol
235+
/// `Sk`, a non-letterlike modifier symbol
235236
SymbolModifier,
236-
/// a symbol of other type
237+
/// `So`, a symbol of other type
237238
SymbolOther,
238-
/// a space character (of various non-zero widths)
239+
/// `Zs`, a space character (of various non-zero widths)
239240
SeparatorSpace,
240-
/// U+2028 LINE SEPARATOR only
241+
/// `Zl`, U+2028 LINE SEPARATOR only
241242
SeparatorLine,
242-
/// U+2029 PARAGRAPH SEPARATOR only
243+
/// `Zp`, U+2029 PARAGRAPH SEPARATOR only
243244
SeparatorParagraph,
244-
/// a C0 or C1 control code
245+
/// `Cc`, a C0 or C1 control code
245246
OtherControl,
246-
/// a format control character
247+
/// `Cf`, a format control character
247248
OtherFormat,
248-
/// a surrogate code point
249+
/// `Cs`, a surrogate code point
249250
OtherSurrogate,
250-
/// a private-use character
251+
/// `Co`, a private-use character
251252
OtherPrivateUse,
252-
/// a reserved unassigned code point or a noncharacter
253+
/// `Cn`, a reserved unassigned code point or a noncharacter
253254
OtherUnassigned,
254255
}
255256
256257
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
258+
/// Groupings of the most general classification of a character.
257259
pub enum GeneralCategoryGroup {
258260
/// Lu | Ll | Lt | Lm | Lo
259261
Letter,
@@ -379,15 +381,25 @@ def emit_emoji_module(f):
379381
380382
#[derive(Copy, Clone, Hash, Eq, PartialEq, Ord, PartialOrd, Debug)]
381383
#[non_exhaustive]
384+
/// The emoji character properties of a character.
382385
pub enum EmojiStatus {
386+
/// `Emoji=NO`, `Emoji_Component=NO`
383387
NonEmoji,
388+
/// `Emoji=NO`, `Emoji_Component=YES`
384389
NonEmojiButEmojiComponent,
390+
/// `Emoji=YES`, `Emoji_Component=NO`;`Emoji_Presentation=YES`
385391
EmojiPresentation,
392+
/// `Emoji=YES`, `Emoji_Component=NO`;`Emoji_Modifier_Base=YES`
386393
EmojiModifierBase,
394+
/// `Emoji=YES`, `Emoji_Component=NO`;`Emoji_Presentation=YES`, `Emoji_Modifier_Base=YES`
387395
EmojiPresentationAndModifierBase,
396+
/// `Emoji=YES`, `Emoji_Component=NO`
388397
EmojiOther,
398+
/// `Emoji=YES`, `Emoji_Component=YES`;`Emoji_Presentation=YES`
389399
EmojiPresentationAndEmojiComponent,
400+
/// `Emoji=YES`, `Emoji_Component=YES`;`Emoji_Presentation=YES`, `Emoji_Modifier=YES`
390401
EmojiPresentationAndModifierAndEmojiComponent,
402+
/// `Emoji=YES`, `Emoji_Component=YES`
391403
EmojiOtherAndEmojiComponent,
392404
}
393405
#[inline]

src/lib.rs

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,70 @@
1+
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
//! Query character Unicode properties according to
12+
//! [Unicode Standard Annex #44](https://www.unicode.org/reports/tr44/)
13+
//! and [Unicode Technical Standard #51](https://www.unicode.org/reports/tr51/)
14+
//! rules.
15+
//!
16+
//! ```rust
17+
//! use unicode_properties::UnicodeEmoji;
18+
//! use unicode_properties::UnicodeGeneralCategory;
19+
//!
20+
//! fn main() {
21+
//! let ch = '🦀'; // U+1F980 CRAB
22+
//! let is_emoji = ch.is_emoji_char();
23+
//! let group = ch.general_category_group();
24+
//! println!("{}({:?})", ch, group);
25+
//! println!("The above char {} for use as emoji char.",
26+
//! if is_emoji { "is recommended" } else { "is not recommended" });
27+
//! }
28+
//! ```
29+
//!
30+
//! # Features
31+
//!
32+
//! ## `general-category`
33+
//!
34+
//! Provides the most general classification of a character,
35+
//! based on its primary characteristic.
36+
//!
37+
//! ## `emoji`
38+
//!
39+
//! Provides the emoji character properties of a character.
40+
//!
41+
#![deny(missing_docs)]
42+
143
#[rustfmt::skip]
244
mod tables;
345

446
#[cfg(feature = "emoji")]
47+
/// Query the emoji character properties of a character.
548
pub mod emoji {
649
pub use crate::tables::emoji::EmojiStatus;
750

51+
/// Query the emoji character properties of a character.
852
pub trait UnicodeEmoji: Sized {
53+
/// Returns the emoji character properties in a status enum.
954
fn emoji_status(self) -> EmojiStatus;
1055

56+
/// Checks whether this character is recommended for use as emoji, i.e. `Emoji=YES`.
1157
fn is_emoji_char(self) -> bool {
1258
crate::tables::emoji::is_emoji_status_for_emoji_char(self.emoji_status())
1359
}
1460

61+
/// Checks whether this character are used in emoji sequences where they're not
62+
/// intended for independent, direct input, i.e. `Emoji_Component=YES`.
1563
fn is_emoji_component(self) -> bool {
1664
crate::tables::emoji::is_emoji_status_for_emoji_component(self.emoji_status())
1765
}
1866

67+
/// Checks whether this character occurs in emoji sequences, i.e. `Emoji=YES | Emoji_Component=YES`
1968
fn is_emoji_char_or_emoji_component(self) -> bool {
2069
crate::tables::emoji::is_emoji_status_for_emoji_char_or_emoji_component(
2170
self.emoji_status(),
@@ -30,42 +79,66 @@ pub mod emoji {
3079
}
3180

3281
#[inline]
82+
/// Checks whether this character is the U+200D ZERO WIDTH JOINER (ZWJ) character.
83+
///
84+
/// It can be used between the elements of a sequence of characters to indicate that
85+
/// a single glyph should be presented if available.
3386
pub fn is_zwj(c: char) -> bool {
3487
c == '\u{200D}'
3588
}
3689

3790
#[inline]
91+
/// Checks whether this character is the U+FE0F VARIATION SELECTOR-16 (VS16) character, used to
92+
/// request an emoji presentation for an emoji character.
3893
pub fn is_emoji_presentation_selector(c: char) -> bool {
3994
c == '\u{FE0F}'
4095
}
4196

4297
#[inline]
98+
/// Checks whether this character is the U+FE0E VARIATION SELECTOR-15 (VS15) character, used to
99+
/// request a text presentation for an emoji character.
43100
pub fn is_text_presentation_selector(c: char) -> bool {
44101
c == '\u{FE0E}'
45102
}
46103

47104
#[inline]
105+
/// Checks whether this character is one of the Regional Indicator characters.
106+
///
107+
/// A pair of REGIONAL INDICATOR symbols is referred to as an emoji_flag_sequence.
48108
pub fn is_regional_indicator(c: char) -> bool {
49109
matches!(c, '\u{1F1E6}'..='\u{1F1FF}')
50110
}
51111

52112
#[inline]
113+
/// Checks whether this character is one of the Tag Characters.
114+
///
115+
/// These can be used in indicating variants or extensions of emoji characters.
53116
pub fn is_tag_character(c: char) -> bool {
54117
matches!(c, '\u{E0020}'..='\u{E007F}')
55118
}
56119
}
57120

58121
#[cfg(feature = "general-category")]
122+
/// Query the general category property of a character.
59123
pub mod general_category {
60124
pub use crate::tables::general_category::{GeneralCategory, GeneralCategoryGroup};
61125

126+
/// Query the general category property of a character.
127+
///
128+
/// See [General Category Values](https://www.unicode.org/reports/tr44/#General_Category_Values) for more info.
62129
pub trait UnicodeGeneralCategory: Sized {
130+
/// Queries the most general classification of a character.
63131
fn general_category(self) -> GeneralCategory;
64132

133+
/// Queries the grouping of the most general classification of a character.
65134
fn general_category_group(self) -> GeneralCategoryGroup {
66135
crate::tables::general_category::general_category_group(self.general_category())
67136
}
68137

138+
/// Queries whether the most general classification of a character belongs to the `LetterCased` group
139+
///
140+
/// The `LetterCased` group includes `LetterUppercase`, `LetterLowercase`, and `LetterTitlecase`
141+
/// categories, and is a subset of the `Letter` group.
69142
fn is_letter_cased(self) -> bool {
70143
crate::tables::general_category::general_category_is_letter_cased(
71144
self.general_category(),
@@ -83,16 +156,21 @@ pub mod general_category {
83156
pub use tables::UNICODE_VERSION;
84157

85158
#[cfg(feature = "emoji")]
159+
#[doc(inline)]
86160
pub use emoji::UnicodeEmoji;
87161

88162
#[cfg(feature = "emoji")]
163+
#[doc(inline)]
89164
pub use emoji::EmojiStatus;
90165

91166
#[cfg(feature = "general-category")]
167+
#[doc(inline)]
92168
pub use general_category::GeneralCategory;
93169

94170
#[cfg(feature = "general-category")]
171+
#[doc(inline)]
95172
pub use general_category::GeneralCategoryGroup;
96173

97174
#[cfg(feature = "general-category")]
175+
#[doc(inline)]
98176
pub use general_category::UnicodeGeneralCategory;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy