Skip to content

Commit

Permalink
Rollup merge of rust-lang#65165 - BO41:char_docs, r=varkor
Browse files Browse the repository at this point in the history
Improve docs on some char boolean methods

simple revival of rust-lang#61794
(also rustfmt on rest of file :)

Documentation for `is_xid_start()` and `is_xid_continue()` couldn't be improved since both methods got remove from this repository

r? @dtolnay
cc @JohnCSimon
  • Loading branch information
Centril authored Oct 13, 2019
2 parents 4dc0b8a + d8c2956 commit 82fb193
Showing 1 changed file with 109 additions and 63 deletions.
172 changes: 109 additions & 63 deletions src/libcore/char/methods.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ impl char {

// the code is split up here to improve execution speed for cases where
// the `radix` is constant and 10 or smaller
let val = if radix <= 10 {
let val = if radix <= 10 {
match self {
'0' ..= '9' => self as u32 - '0' as u32,
'0'..='9' => self as u32 - '0' as u32,
_ => return None,
}
} else {
Expand All @@ -130,8 +130,11 @@ impl char {
}
};

if val < radix { Some(val) }
else { None }
if val < radix {
Some(val)
} else {
None
}
}

/// Returns an iterator that yields the hexadecimal Unicode escape of a
Expand Down Expand Up @@ -303,8 +306,8 @@ impl char {
'\r' => EscapeDefaultState::Backslash('r'),
'\n' => EscapeDefaultState::Backslash('n'),
'\\' | '\'' | '"' => EscapeDefaultState::Backslash(self),
'\x20' ..= '\x7e' => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode())
'\x20'..='\x7e' => EscapeDefaultState::Char(self),
_ => EscapeDefaultState::Unicode(self.escape_unicode()),
};
EscapeDefault { state: init_state }
}
Expand Down Expand Up @@ -436,30 +439,31 @@ impl char {
pub fn encode_utf8(self, dst: &mut [u8]) -> &mut str {
let code = self as u32;
unsafe {
let len =
if code < MAX_ONE_B && !dst.is_empty() {
let len = if code < MAX_ONE_B && !dst.is_empty() {
*dst.get_unchecked_mut(0) = code as u8;
1
} else if code < MAX_TWO_B && dst.len() >= 2 {
*dst.get_unchecked_mut(0) = (code >> 6 & 0x1F) as u8 | TAG_TWO_B;
*dst.get_unchecked_mut(1) = (code & 0x3F) as u8 | TAG_CONT;
2
} else if code < MAX_THREE_B && dst.len() >= 3 {
} else if code < MAX_THREE_B && dst.len() >= 3 {
*dst.get_unchecked_mut(0) = (code >> 12 & 0x0F) as u8 | TAG_THREE_B;
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(1) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(2) = (code & 0x3F) as u8 | TAG_CONT;
3
} else if dst.len() >= 4 {
*dst.get_unchecked_mut(0) = (code >> 18 & 0x07) as u8 | TAG_FOUR_B;
*dst.get_unchecked_mut(1) = (code >> 12 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(2) = (code >> 6 & 0x3F) as u8 | TAG_CONT;
*dst.get_unchecked_mut(3) = (code & 0x3F) as u8 | TAG_CONT;
4
} else {
panic!("encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
panic!(
"encode_utf8: need {} bytes to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf8(),
code,
dst.len())
dst.len(),
)
};
from_utf8_unchecked_mut(dst.get_unchecked_mut(..len))
}
Expand Down Expand Up @@ -515,15 +519,24 @@ impl char {
*dst.get_unchecked_mut(1) = 0xDC00 | ((code as u16) & 0x3FF);
slice::from_raw_parts_mut(dst.as_mut_ptr(), 2)
} else {
panic!("encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
panic!(
"encode_utf16: need {} units to encode U+{:X}, but the buffer has {}",
from_u32_unchecked(code).len_utf16(),
code,
dst.len())
dst.len(),
)
}
}
}

/// Returns `true` if this `char` is an alphabetic code point, and false if not.
/// Returns `true` if this `char` has the `Alphabetic` property.
///
/// `Alphabetic` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
///
/// # Examples
///
Expand All @@ -547,10 +560,14 @@ impl char {
}
}

/// Returns `true` if this `char` is lowercase.
/// Returns `true` if this `char` has the `Lowercase` property.
///
/// 'Lowercase' is defined according to the terms of the Unicode Derived Core
/// Property `Lowercase`.
/// `Lowercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
///
/// # Examples
///
Expand All @@ -575,10 +592,14 @@ impl char {
}
}

/// Returns `true` if this `char` is uppercase.
/// Returns `true` if this `char` has the `Uppercase` property.
///
/// `Uppercase` is described in Chapter 4 (Character Properties) of the [Unicode Standard] and
/// specified in the [Unicode Character Database][ucd] [`DerivedCoreProperties.txt`].
///
/// 'Uppercase' is defined according to the terms of the Unicode Derived Core
/// Property `Uppercase`.
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
///
/// # Examples
///
Expand All @@ -603,10 +624,12 @@ impl char {
}
}

/// Returns `true` if this `char` is whitespace.
/// Returns `true` if this `char` has the `White_Space` property.
///
/// 'Whitespace' is defined according to the terms of the Unicode Derived Core
/// Property `White_Space`.
/// `White_Space` is specified in the [Unicode Character Database][ucd] [`PropList.txt`].
///
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`PropList.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
///
/// # Examples
///
Expand All @@ -630,10 +653,10 @@ impl char {
}
}

/// Returns `true` if this `char` is alphanumeric.
/// Returns `true` if this `char` satisfies either [`is_alphabetic()`] or [`is_numeric()`].
///
/// 'Alphanumeric'-ness is defined in terms of the Unicode General Categories
/// `Nd`, `Nl`, `No` and the Derived Core Property `Alphabetic`.
/// [`is_alphabetic()`]: #method.is_alphabetic
/// [`is_numeric()`]: #method.is_numeric
///
/// # Examples
///
Expand All @@ -655,10 +678,15 @@ impl char {
self.is_alphabetic() || self.is_numeric()
}

/// Returns `true` if this `char` is a control code point.
/// Returns `true` if this `char` has the general category for control codes.
///
/// Control codes (code points with the general category of `Cc`) are described in Chapter 4
/// (Character Properties) of the [Unicode Standard] and specified in the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`].
///
/// 'Control code point' is defined in terms of the Unicode General
/// Category `Cc`.
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// # Examples
///
Expand All @@ -675,19 +703,29 @@ impl char {
general_category::Cc(self)
}

/// Returns `true` if this `char` is an extended grapheme character.
/// Returns `true` if this `char` has the `Grapheme_Extend` property.
///
/// 'Extended grapheme character' is defined in terms of the Unicode Shaping and Rendering
/// Category `Grapheme_Extend`.
/// `Grapheme_Extend` is described in [Unicode Standard Annex #29 (Unicode Text
/// Segmentation)][uax29] and specified in the [Unicode Character Database][ucd]
/// [`DerivedCoreProperties.txt`].
///
/// [uax29]: https://www.unicode.org/reports/tr29/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`DerivedCoreProperties.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
#[inline]
pub(crate) fn is_grapheme_extended(self) -> bool {
derived_property::Grapheme_Extend(self)
}

/// Returns `true` if this `char` is numeric.
/// Returns `true` if this `char` has one of the general categories for numbers.
///
/// The general categories for numbers (`Nd` for decimal digits, `Nl` for letter-like numeric
/// characters, and `No` for other numeric characters) are specified in the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`].
///
/// 'Numeric'-ness is defined in terms of the Unicode General Categories
/// `Nd`, `Nl`, `No`.
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// # Examples
///
Expand All @@ -713,25 +751,29 @@ impl char {
}
}

/// Returns an iterator that yields the lowercase equivalent of a `char`
/// as one or more `char`s.
/// Returns an iterator that yields the lowercase mapping of this `char` as one or more
/// `char`s.
///
/// If a character does not have a lowercase equivalent, the same character
/// will be returned back by the iterator.
/// If this `char` does not have a lowercase mapping, the iterator yields the same `char`.
///
/// This performs complex unconditional mappings with no tailoring: it maps
/// one Unicode character to its lowercase equivalent according to the
/// [Unicode database] and the additional complex mappings
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
/// language) are not considered here.
/// If this `char` has a one-to-one lowercase mapping given by the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
///
/// For a full reference, see [here][reference].
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
/// the `char`(s) given by [`SpecialCasing.txt`].
///
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
///
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
/// is independent of context and language.
///
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
///
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
///
/// # Examples
///
Expand Down Expand Up @@ -774,25 +816,29 @@ impl char {
ToLowercase(CaseMappingIter::new(conversions::to_lower(self)))
}

/// Returns an iterator that yields the uppercase equivalent of a `char`
/// as one or more `char`s.
/// Returns an iterator that yields the uppercase mapping of this `char` as one or more
/// `char`s.
///
/// If this `char` does not have a uppercase mapping, the iterator yields the same `char`.
///
/// If this `char` has a one-to-one uppercase mapping given by the [Unicode Character
/// Database][ucd] [`UnicodeData.txt`], the iterator yields that `char`.
///
/// If a character does not have an uppercase equivalent, the same character
/// will be returned back by the iterator.
/// [ucd]: https://www.unicode.org/reports/tr44/
/// [`UnicodeData.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
///
/// This performs complex unconditional mappings with no tailoring: it maps
/// one Unicode character to its uppercase equivalent according to the
/// [Unicode database] and the additional complex mappings
/// [`SpecialCasing.txt`]. Conditional mappings (based on context or
/// language) are not considered here.
/// If this `char` requires special considerations (e.g. multiple `char`s) the iterator yields
/// the `char`(s) given by [`SpecialCasing.txt`].
///
/// For a full reference, see [here][reference].
/// [`SpecialCasing.txt`]: https://www.unicode.org/Public/UCD/latest/ucd/SpecialCasing.txt
///
/// [Unicode database]: ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
/// This operation performs an unconditional mapping without tailoring. That is, the conversion
/// is independent of context and language.
///
/// [`SpecialCasing.txt`]: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt
/// In the [Unicode Standard], Chapter 4 (Character Properties) discusses case mapping in
/// general and Chapter 3 (Conformance) discusses the default algorithm for case conversion.
///
/// [reference]: http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
/// [Unicode Standard]: https://www.unicode.org/versions/latest/
///
/// # Examples
///
Expand Down

0 comments on commit 82fb193

Please sign in to comment.