From a0c186c34f67c7eb3b392755091dd3a17f5acbdf Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Sun, 21 Jul 2019 14:50:39 +0300 Subject: [PATCH 1/2] remove XID and Pattern_White_Space unicode tables from libcore They are only used by rustc_lexer, and are not needed elsewhere. So we move the relevant definitions into rustc_lexer (while the actual unicode data comes from the unicode-xid crate) and make the rest of the compiler use it. --- Cargo.lock | 17 +- src/libcore/char/methods.rs | 23 -- src/libcore/unicode/mod.rs | 5 - src/libcore/unicode/tables.rs | 375 ------------------ src/libcore/unicode/unicode.py | 9 +- src/libfmt_macros/Cargo.toml | 2 +- src/libfmt_macros/lib.rs | 10 +- src/librustc_lexer/Cargo.toml | 8 +- src/librustc_lexer/src/lib.rs | 63 ++- src/librustc_mir/Cargo.toml | 1 + src/librustc_mir/borrow_check/move_errors.rs | 5 +- .../borrow_check/mutability_errors.rs | 4 +- src/librustdoc/lib.rs | 1 + src/librustdoc/test.rs | 5 +- src/libsyntax/ext/proc_macro_server.rs | 4 +- src/libsyntax_ext/Cargo.toml | 1 + 16 files changed, 69 insertions(+), 464 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e0d30647809a9..1b916576bc99c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1011,6 +1011,7 @@ dependencies = [ name = "fmt_macros" version = "0.0.0" dependencies = [ + "rustc_lexer", "syntax_pos", ] @@ -2372,7 +2373,7 @@ version = "0.4.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" dependencies = [ - "unicode-xid", + "unicode-xid 0.1.0", ] [[package]] @@ -3290,7 +3291,7 @@ dependencies = [ name = "rustc_lexer" version = "0.1.0" dependencies = [ - "unicode-xid", + "unicode-xid 0.2.0", ] [[package]] @@ -3368,6 +3369,7 @@ dependencies = [ "rustc_apfloat", "rustc_data_structures", "rustc_errors", + "rustc_lexer", "rustc_target", "serialize", "smallvec", @@ -3976,7 +3978,7 @@ checksum = "641e117d55514d6d918490e47102f7e08d096fdde360247e4a10f7a91a8478d3" dependencies = [ "proc-macro2", "quote", - "unicode-xid", + "unicode-xid 0.1.0", ] [[package]] @@ -3988,7 +3990,7 @@ dependencies = [ "proc-macro2", "quote", "syn", - "unicode-xid", + "unicode-xid 0.1.0", ] [[package]] @@ -4017,6 +4019,7 @@ dependencies = [ "log", "rustc_data_structures", "rustc_errors", + "rustc_lexer", "rustc_target", "smallvec", "syntax", @@ -4532,6 +4535,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +[[package]] +name = "unicode-xid" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" + [[package]] name = "unicode_categories" version = "0.1.1" diff --git a/src/libcore/char/methods.rs b/src/libcore/char/methods.rs index e91bf53c5b418..a69eb0f6d4b20 100644 --- a/src/libcore/char/methods.rs +++ b/src/libcore/char/methods.rs @@ -547,29 +547,6 @@ impl char { } } - /// Returns `true` if this `char` satisfies the `XID_Start` Unicode property, and false - /// otherwise. - /// - /// `XID_Start` is a Unicode Derived Property specified in - /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), - /// mostly similar to `ID_Start` but modified for closure under `NFKx`. - #[unstable(feature = "unicode_internals", issue = "0")] - pub fn is_xid_start(self) -> bool { - derived_property::XID_Start(self) - } - - /// Returns `true` if this `char` satisfies the `XID_Continue` Unicode property, and false - /// otherwise. - /// - /// `XID_Continue` is a Unicode Derived Property specified in - /// [UAX #31](http://unicode.org/reports/tr31/#NFKC_Modifications), - /// mostly similar to `ID_Continue` but modified for closure under NFKx. - #[unstable(feature = "unicode_internals", issue = "0")] - #[inline] - pub fn is_xid_continue(self) -> bool { - derived_property::XID_Continue(self) - } - /// Returns `true` if this `char` is lowercase. /// /// 'Lowercase' is defined according to the terms of the Unicode Derived Core diff --git a/src/libcore/unicode/mod.rs b/src/libcore/unicode/mod.rs index 272727def61d6..a3ec9fd51f064 100644 --- a/src/libcore/unicode/mod.rs +++ b/src/libcore/unicode/mod.rs @@ -13,8 +13,3 @@ pub mod derived_property { pub mod conversions { pub use crate::unicode::tables::conversions::{to_lower, to_upper}; } - -// For use in libsyntax -pub mod property { - pub use crate::unicode::tables::property::Pattern_White_Space; -} diff --git a/src/libcore/unicode/tables.rs b/src/libcore/unicode/tables.rs index 3fae3a46ada6b..5b5be48543121 100644 --- a/src/libcore/unicode/tables.rs +++ b/src/libcore/unicode/tables.rs @@ -890,384 +890,9 @@ pub(crate) mod derived_property { Uppercase_table.lookup(c) } - const XID_Continue_table: &super::BoolTrie = &super::BoolTrie { - r1: [ - 0x03ff000000000000, 0x07fffffe87fffffe, 0x04a0040000000000, 0xff7fffffff7fffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x0000501f0003ffc3, - 0xffffffffffffffff, 0xb8dfffffffffffff, 0xfffffffbffffd7c0, 0xffbfffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffcfb, 0xffffffffffffffff, - 0xfffeffffffffffff, 0xffffffff027fffff, 0xbffffffffffe01ff, 0x000787ffffff00b6, - 0xffffffff07ff0000, 0xffffc3ffffffffff, 0xffffffffffffffff, 0x9ffffdff9fefffff, - 0xffffffffffff0000, 0xffffffffffffe7ff, 0x0003ffffffffffff, 0x243fffffffffffff - ], - r2: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 25, 26, 27, 28, 29, 30, 31, 4, 32, 33, 34, 4, 4, 4, 4, 4, 35, 36, 37, 38, 39, 40, - 41, 42, 4, 4, 4, 4, 4, 4, 4, 4, 43, 44, 45, 46, 47, 4, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 4, 61, 4, 62, 63, 64, 65, 66, 4, 4, 4, 67, 4, 4, 4, 4, 68, 69, 70, - 71, 72, 73, 74, 75, 76, 77, 78, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 79, 80, 4, 81, 82, 83, 84, 85, 60, 60, 60, 60, 60, 60, 60, 60, 86, - 42, 87, 88, 89, 4, 90, 91, 60, 60, 60, 60, 60, 60, 60, 60, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 52, 60, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 92, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 93, 94, 4, 4, 4, 4, 95, 96, 4, 97, 98, 4, 99, 100, 101, 62, 4, 102, 103, - 104, 4, 105, 106, 107, 4, 108, 109, 110, 4, 111, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 112, 113, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, - 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 60, 4, 4, 4, 4, 4, 103, 4, 114, - 115, 116, 97, 117, 4, 118, 4, 4, 119, 120, 121, 122, 123, 124, 4, 125, 126, 127, 128, - 129 - ], - r3: &[ - 0x00003fffffffffff, 0x000007ff0fffffff, 0x3fdfffff00000000, 0xfffffffbfff80000, - 0xffffffffffffffff, 0xfffeffcfffffffff, 0xf3c5fdfffff99fef, 0x5003ffcfb080799f, - 0xd36dfdfffff987ee, 0x003fffc05e023987, 0xf3edfdfffffbbfee, 0xfe00ffcf00013bbf, - 0xf3edfdfffff99fee, 0x0002ffcfb0c0399f, 0xc3ffc718d63dc7ec, 0x0000ffc000813dc7, - 0xe3fffdfffffddfff, 0x0000ffcf07603ddf, 0xf3effdfffffddfef, 0x0006ffcf40603ddf, - 0xfffffffffffddfef, 0xfc00ffcf80f07ddf, 0x2ffbfffffc7fffec, 0x000cffc0ff5f847f, - 0x07fffffffffffffe, 0x0000000003ff7fff, 0x3fffffaffffff7d6, 0x00000000f3ff3f5f, - 0xc2a003ff03000001, 0xfffe1ffffffffeff, 0x1ffffffffeffffdf, 0x0000000000000040, - 0xffffffffffff03ff, 0xffffffff3fffffff, 0xf7ffffffffff20bf, 0xffffffff3d7f3dff, - 0x7f3dffffffff3dff, 0xffffffffff7fff3d, 0xffffffffff3dffff, 0x0003fe00e7ffffff, - 0xffffffff0000ffff, 0x3f3fffffffffffff, 0xfffffffffffffffe, 0xffff9fffffffffff, - 0xffffffff07fffffe, 0x01ffc7ffffffffff, 0x001fffff001fdfff, 0x000ddfff000fffff, - 0x000003ff308fffff, 0xffffffff03ff3800, 0x01ffffffffffffff, 0xffff07ffffffffff, - 0x003fffffffffffff, 0x0fff0fff7fffffff, 0x001f3fffffffffc0, 0xffff0fffffffffff, - 0x0000000007ff03ff, 0xffffffff0fffffff, 0x9fffffff7fffffff, 0x3fff008003ff03ff, - 0x0000000000000000, 0x000ff80003ff0fff, 0x000fffffffffffff, 0x00ffffffffffffff, - 0x3fffffffffffe3ff, 0xe7ffffffffff01ff, 0x07fffffffff70000, 0xfbffffffffffffff, - 0xffffffff3f3fffff, 0x3fffffffaaff3f3f, 0x5fdfffffffffffff, 0x1fdc1fff0fcf1fdc, - 0x8000000000000000, 0x8002000000100001, 0x000000001fff0000, 0x0001ffe21fff0000, - 0xf3fffd503f2ffc84, 0xffffffff000043e0, 0x00000000000001ff, 0xffff7fffffffffff, - 0xffffffff7fffffff, 0x000ff81fffffffff, 0xffff20bfffffffff, 0x800080ffffffffff, - 0x7f7f7f7f007fffff, 0xffffffff7f7f7f7f, 0x1f3efffe000000e0, 0xfffffffee67fffff, - 0xf7ffffffffffffff, 0xfffeffffffffffe0, 0x07ffffff00007fff, 0xffff000000000000, - 0x0000ffffffffffff, 0x0000000000001fff, 0x3fffffffffff0000, 0x00000fffffff1fff, - 0xbff0ffffffffffff, 0x0003ffffffffffff, 0xfffffffcff800000, 0xfffffffffffff9ff, - 0xff8000000000007c, 0x000000ffffffffff, 0xe8ffffff03ff003f, 0xffff3fffffffffff, - 0x1fffffff000fffff, 0x7fffffff03ff8001, 0x007fffffffffffff, 0xfc7fffff03ff3fff, - 0x007cffff38000007, 0xffff7f7f007e7e7e, 0xffff00fff7ffffff, 0x03ff37ffffffffff, - 0xffff000fffffffff, 0x0ffffffffffff87f, 0x0000000003ffffff, 0x5f7ffdffe0f8007f, - 0xffffffffffffffdb, 0xfffffffffff80000, 0xfffffff03fffffff, 0x3fffffffffffffff, - 0xffffffffffff0000, 0xfffffffffffcffff, 0x03ff0000000000ff, 0x0018ffff0000ffff, - 0xaa8a00000000e000, 0x1fffffffffffffff, 0x87fffffe03ff0000, 0xffffffc007fffffe, - 0x7fffffffffffffff, 0x000000001cfcfcfc - ], - r4: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 5, 9, 5, 10, 11, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13, - 14, 7, 15, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 17, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - ], - r5: &[ - 0, 1, 2, 3, 4, 5, 4, 6, 4, 4, 7, 8, 9, 10, 11, 12, 2, 2, 13, 14, 15, 16, 4, 4, 2, 2, 2, - 2, 17, 18, 4, 4, 19, 20, 21, 22, 23, 4, 24, 4, 25, 26, 27, 28, 29, 30, 31, 4, 2, 32, 33, - 33, 34, 4, 4, 4, 4, 4, 4, 4, 35, 36, 4, 37, 2, 38, 3, 39, 40, 41, 2, 42, 43, 4, 44, 45, - 46, 47, 4, 4, 2, 48, 2, 49, 4, 4, 50, 51, 2, 52, 53, 54, 55, 4, 4, 4, 3, 4, 56, 57, 4, - 4, 58, 59, 60, 61, 62, 53, 4, 4, 4, 4, 63, 64, 65, 4, 66, 67, 68, 4, 4, 4, 4, 37, 4, 4, - 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 69, 4, 2, 70, 2, 2, 2, 71, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 70, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 72, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 2, 2, 2, 2, 2, 2, 2, 2, 53, 73, 4, 74, 17, 75, 76, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, - 4, 4, 2, 77, 78, 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 80, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 33, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 21, 81, 2, 2, 2, 2, - 2, 82, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 83, 84, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 85, 86, 4, 4, 87, 4, 4, 4, 4, 4, 4, 2, 88, 89, 90, 91, 92, 2, 2, 2, 2, 93, 94, 95, - 96, 97, 98, 4, 4, 4, 4, 4, 4, 4, 4, 99, 100, 101, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 102, 4, 4, 4, 103, 104, 4, 4, 4, 4, 4, 105, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 106, 2, 107, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 108, 109, 110, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 111, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 2, 11, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 112, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 113, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 114, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 115, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 - ], - r6: &[ - 0xb7ffff7fffffefff, 0x000000003fff3fff, 0xffffffffffffffff, 0x07ffffffffffffff, - 0x0000000000000000, 0x001fffffffffffff, 0x2000000000000000, 0xffffffff1fffffff, - 0x000000010001ffff, 0xffffe000ffffffff, 0x07ffffffffff07ff, 0xffffffff3fffffff, - 0x00000000003eff0f, 0xffff03ff3fffffff, 0x0fffffffff0fffff, 0xffff00ffffffffff, - 0x0000000fffffffff, 0x007fffffffffffff, 0x000000ff003fffff, 0x91bffffffffffd3f, - 0x007fffff003fffff, 0x000000007fffffff, 0x0037ffff00000000, 0x03ffffff003fffff, - 0xc0ffffffffffffff, 0x873ffffffeeff06f, 0x1fffffff00000000, 0x000000001fffffff, - 0x0000007ffffffeff, 0x003fffffffffffff, 0x0007ffff003fffff, 0x000000000003ffff, - 0x00000000000001ff, 0x0007ffffffffffff, 0x03ff00ffffffffff, 0xffff00801fffffff, - 0x000000000001ffff, 0x007fffff00000000, 0x8000ffc00000007f, 0x03ff01ffffff0000, - 0xffdfffffffffffff, 0x004fffffffff0070, 0x0000000017ff1e1f, 0x40fffffffffbffff, - 0xffff01ffbfffbd7f, 0x03ff07ffffffffff, 0xfbedfdfffff99fef, 0x001f1fcfe081399f, - 0x00000000c3ff07ff, 0x0000000003ff00bf, 0xff3fffffffffffff, 0x000000003f000001, - 0x0000000003ff0011, 0x01ffffffffffffff, 0x00000000000003ff, 0x03ff0fffe7ffffff, - 0xffffffff00000000, 0x800003ffffffffff, 0xfffffcff00000000, 0x0000001bfcffffff, - 0x7fffffffffffffff, 0xffffffffffff0080, 0x0000000023ffffff, 0xff7ffffffffffdff, - 0xfffc000003ff0001, 0x007ffefffffcffff, 0xb47ffffffffffb7f, 0xfffffdbf03ff00ff, - 0x000003ff01fb7fff, 0x0000000003ffffff, 0x00007fffffffffff, 0x000000000000000f, - 0x000000000000007f, 0x000003ff7fffffff, 0x001f3fffffff0000, 0xe0fffff803ff000f, - 0x000000000000ffff, 0xffffffffffff87ff, 0x00000000ffff80ff, 0x0000000b00000000, - 0x00ffffffffffffff, 0xffff00f000070000, 0x0fffffffffffffff, 0x1fff07ffffffffff, - 0x0000000063ff01ff, 0xf807e3e000000000, 0x00003c0000000fe7, 0x000000000000001c, - 0xffffffffffdfffff, 0xebffde64dfffffff, 0xffffffffffffffef, 0x7bffffffdfdfe7bf, - 0xfffffffffffdfc5f, 0xffffff3fffffffff, 0xf7fffffff7fffffd, 0xffdfffffffdfffff, - 0xffff7fffffff7fff, 0xfffffdfffffffdff, 0xffffffffffffcff7, 0xf87fffffffffffff, - 0x00201fffffffffff, 0x0000fffef8000010, 0x000007dbf9ffff7f, 0x3fff1fffffffffff, - 0x00000000000043ff, 0x03ffffffffffffff, 0x00000000007f001f, 0x0000000003ff0fff, - 0x0af7fe96ffffffef, 0x5ef7f796aa96ea84, 0x0ffffbee0ffffbff, 0x00000000007fffff, - 0xffff0003ffffffff, 0x00000001ffffffff, 0x000000003fffffff, 0x0000ffffffffffff - ], - }; - - pub fn XID_Continue(c: char) -> bool { - XID_Continue_table.lookup(c) - } - - const XID_Start_table: &super::BoolTrie = &super::BoolTrie { - r1: [ - 0x0000000000000000, 0x07fffffe07fffffe, 0x0420040000000000, 0xff7fffffff7fffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0x0000501f0003ffc3, - 0x0000000000000000, 0xb8df000000000000, 0xfffffffbffffd740, 0xffbfffffffffffff, - 0xffffffffffffffff, 0xffffffffffffffff, 0xfffffffffffffc03, 0xffffffffffffffff, - 0xfffeffffffffffff, 0xffffffff027fffff, 0x00000000000001ff, 0x000787ffffff0000, - 0xffffffff00000000, 0xfffec000000007ff, 0xffffffffffffffff, 0x9c00c060002fffff, - 0x0000fffffffd0000, 0xffffffffffffe000, 0x0002003fffffffff, 0x043007fffffffc00 - ], - r2: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, - 24, 23, 25, 26, 27, 28, 29, 3, 30, 31, 32, 33, 34, 34, 34, 34, 34, 35, 36, 37, 38, 39, - 40, 41, 42, 34, 34, 34, 34, 34, 34, 34, 34, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 3, 61, 62, 63, 64, 65, 66, 67, 68, 34, 34, 34, 3, 34, 34, - 34, 34, 69, 70, 71, 72, 3, 73, 74, 3, 75, 76, 77, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 78, - 79, 34, 80, 81, 82, 83, 84, 3, 3, 3, 3, 3, 3, 3, 3, 85, 42, 86, 87, 88, 34, 89, 90, 3, - 3, 3, 3, 3, 3, 3, 3, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 53, 3, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 91, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 92, 93, 34, 34, 34, 34, 94, - 95, 96, 91, 97, 34, 98, 99, 100, 48, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, - 111, 112, 34, 113, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, 34, - 34, 34, 34, 114, 115, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 34, 34, 34, 34, 34, - 116, 34, 117, 118, 119, 120, 121, 34, 122, 34, 34, 123, 124, 125, 126, 3, 127, 34, 128, - 129, 130, 131, 132 - ], - r3: &[ - 0x00000110043fffff, 0x000007ff01ffffff, 0x3fdfffff00000000, 0x0000000000000000, - 0x23fffffffffffff0, 0xfffe0003ff010000, 0x23c5fdfffff99fe1, 0x10030003b0004000, - 0x036dfdfffff987e0, 0x001c00005e000000, 0x23edfdfffffbbfe0, 0x0200000300010000, - 0x23edfdfffff99fe0, 0x00020003b0000000, 0x03ffc718d63dc7e8, 0x0000000000010000, - 0x23fffdfffffddfe0, 0x0000000307000000, 0x23effdfffffddfe1, 0x0006000340000000, - 0x27fffffffffddfe0, 0xfc00000380704000, 0x2ffbfffffc7fffe0, 0x000000000000007f, - 0x0005fffffffffffe, 0x2005ffaffffff7d6, 0x00000000f000005f, 0x0000000000000001, - 0x00001ffffffffeff, 0x0000000000001f00, 0x800007ffffffffff, 0xffe1c0623c3f0000, - 0xffffffff00004003, 0xf7ffffffffff20bf, 0xffffffffffffffff, 0xffffffff3d7f3dff, - 0x7f3dffffffff3dff, 0xffffffffff7fff3d, 0xffffffffff3dffff, 0x0000000007ffffff, - 0xffffffff0000ffff, 0x3f3fffffffffffff, 0xfffffffffffffffe, 0xffff9fffffffffff, - 0xffffffff07fffffe, 0x01ffc7ffffffffff, 0x0003ffff0003dfff, 0x0001dfff0003ffff, - 0x000fffffffffffff, 0x0000000010800000, 0xffffffff00000000, 0x01ffffffffffffff, - 0xffff05ffffffffff, 0x003fffffffffffff, 0x000000007fffffff, 0x001f3fffffff0000, - 0xffff0fffffffffff, 0x00000000000003ff, 0xffffffff007fffff, 0x00000000001fffff, - 0x0000008000000000, 0x000fffffffffffe0, 0x0000000000000fe0, 0xfc00c001fffffff8, - 0x0000003fffffffff, 0x0000000fffffffff, 0x3ffffffffc00e000, 0xe7ffffffffff01ff, - 0x046fde0000000000, 0xffffffff3f3fffff, 0x3fffffffaaff3f3f, 0x5fdfffffffffffff, - 0x1fdc1fff0fcf1fdc, 0x8002000000000000, 0x000000001fff0000, 0xf3fffd503f2ffc84, - 0xffffffff000043e0, 0x00000000000001ff, 0xffff7fffffffffff, 0xffffffff7fffffff, - 0x000c781fffffffff, 0xffff20bfffffffff, 0x000080ffffffffff, 0x7f7f7f7f007fffff, - 0x000000007f7f7f7f, 0x1f3e03fe000000e0, 0xfffffffee07fffff, 0xf7ffffffffffffff, - 0xfffeffffffffffe0, 0x07ffffff00007fff, 0xffff000000000000, 0x0000ffffffffffff, - 0x0000000000001fff, 0x3fffffffffff0000, 0x00000c00ffff1fff, 0x80007fffffffffff, - 0xffffffff3fffffff, 0xfffffffcff800000, 0xfffffffffffff9ff, 0xff8000000000007c, - 0x00000007fffff7bb, 0x000ffffffffffffc, 0x68fc000000000000, 0xffff003ffffffc00, - 0x1fffffff0000007f, 0x0007fffffffffff0, 0x7c00ffdf00008000, 0x000001ffffffffff, - 0xc47fffff00000ff7, 0x3e62ffffffffffff, 0x001c07ff38000005, 0xffff7f7f007e7e7e, - 0xffff00fff7ffffff, 0x00000007ffffffff, 0xffff000fffffffff, 0x0ffffffffffff87f, - 0xffff3fffffffffff, 0x0000000003ffffff, 0x5f7ffdffa0f8007f, 0xffffffffffffffdb, - 0x0003ffffffffffff, 0xfffffffffff80000, 0xfffffff03fffffff, 0x3fffffffffffffff, - 0xffffffffffff0000, 0xfffffffffffcffff, 0x03ff0000000000ff, 0xaa8a000000000000, - 0x1fffffffffffffff, 0x07fffffe00000000, 0xffffffc007fffffe, 0x7fffffff3fffffff, - 0x000000001cfcfcfc - ], - r4: [ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 5, 5, 9, 5, 10, 11, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 12, 13, - 14, 7, 15, 16, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 - ], - r5: &[ - 0, 1, 2, 3, 4, 5, 4, 4, 4, 4, 6, 7, 8, 9, 10, 11, 2, 2, 12, 13, 14, 15, 4, 4, 2, 2, 2, - 2, 16, 17, 4, 4, 18, 19, 20, 21, 22, 4, 23, 4, 24, 25, 26, 27, 28, 29, 30, 4, 2, 31, 32, - 32, 15, 4, 4, 4, 4, 4, 4, 4, 33, 34, 4, 35, 36, 4, 37, 38, 39, 40, 41, 42, 43, 4, 44, - 20, 45, 46, 4, 4, 5, 47, 48, 49, 4, 4, 50, 51, 48, 52, 53, 4, 54, 4, 4, 4, 55, 4, 56, - 57, 4, 4, 58, 59, 60, 61, 62, 63, 4, 4, 4, 4, 64, 65, 66, 4, 67, 68, 69, 4, 4, 4, 4, 70, - 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 71, 4, 2, 50, 2, 2, 2, 72, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 50, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 73, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 63, 20, 4, 74, 48, 75, 66, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 2, 4, 4, 2, 76, 77, 78, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 79, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 32, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 20, 80, 2, - 2, 2, 2, 2, 81, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 82, 83, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 84, 85, 86, 87, 88, 2, 2, 2, 2, 89, 90, - 91, 92, 93, 94, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 95, 96, 4, 4, 4, 4, 4, 55, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 97, 2, 98, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 99, 100, 101, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 102, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 2, 2, 2, 10, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 103, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 105, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 - ], - r6: &[ - 0xb7ffff7fffffefff, 0x000000003fff3fff, 0xffffffffffffffff, 0x07ffffffffffffff, - 0x0000000000000000, 0x001fffffffffffff, 0xffffffff1fffffff, 0x000000000001ffff, - 0xffffe000ffffffff, 0x003fffffffff07ff, 0xffffffff3fffffff, 0x00000000003eff0f, - 0xffff00003fffffff, 0x0fffffffff0fffff, 0xffff00ffffffffff, 0x0000000fffffffff, - 0x007fffffffffffff, 0x000000ff003fffff, 0x91bffffffffffd3f, 0x007fffff003fffff, - 0x000000007fffffff, 0x0037ffff00000000, 0x03ffffff003fffff, 0xc0ffffffffffffff, - 0x003ffffffeef0001, 0x1fffffff00000000, 0x000000001fffffff, 0x0000001ffffffeff, - 0x003fffffffffffff, 0x0007ffff003fffff, 0x000000000003ffff, 0x00000000000001ff, - 0x0007ffffffffffff, 0xffff00801fffffff, 0x000000000000003f, 0x007fffff00000000, - 0x00fffffffffffff8, 0x0000fffffffffff8, 0x000001ffffff0000, 0x0000007ffffffff8, - 0x0047ffffffff0010, 0x0007fffffffffff8, 0x000000001400001e, 0x00000ffffffbffff, - 0xffff01ffbfffbd7f, 0x23edfdfffff99fe0, 0x00000003e0010000, 0x0000000080000780, - 0x0000ffffffffffff, 0x00000000000000b0, 0x00007fffffffffff, 0x000000000f000000, - 0x0000000000000010, 0x010007ffffffffff, 0x0000000007ffffff, 0x00000fffffffffff, - 0xffffffff00000000, 0x80000000ffffffff, 0xfffffcff00000000, 0x0000000a0001ffff, - 0x0407fffffffff801, 0xfffffffff0010000, 0x00000000200003ff, 0x01ffffffffffffff, - 0x00007ffffffffdff, 0xfffc000000000001, 0x000000000000ffff, 0x0001fffffffffb7f, - 0xfffffdbf00000040, 0x00000000010003ff, 0x0007ffff00000000, 0x0000000003ffffff, - 0x000000000000000f, 0x000000000000007f, 0x00003fffffff0000, 0xe0fffff80000000f, - 0x00000000000107ff, 0x00000000fff80000, 0x0000000b00000000, 0x00ffffffffffffff, - 0xffff00f000070000, 0x0fffffffffffffff, 0x1fff07ffffffffff, 0x0000000003ff01ff, - 0xffffffffffdfffff, 0xebffde64dfffffff, 0xffffffffffffffef, 0x7bffffffdfdfe7bf, - 0xfffffffffffdfc5f, 0xffffff3fffffffff, 0xf7fffffff7fffffd, 0xffdfffffffdfffff, - 0xffff7fffffff7fff, 0xfffffdfffffffdff, 0x0000000000000ff7, 0x3f801fffffffffff, - 0x0000000000004000, 0x000000000000001f, 0x000000000000080f, 0x0af7fe96ffffffef, - 0x5ef7f796aa96ea84, 0x0ffffbee0ffffbff, 0x00000000007fffff, 0xffff0003ffffffff, - 0x00000001ffffffff, 0x000000003fffffff - ], - }; - - pub fn XID_Start(c: char) -> bool { - XID_Start_table.lookup(c) - } - } pub(crate) mod property { - const Pattern_White_Space_table: &super::SmallBoolTrie = &super::SmallBoolTrie { - r1: &[ - 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3 - ], - r2: &[ - 0x0000000100003e00, 0x0000000000000000, 0x0000000000000020, 0x000003000000c000 - ], - }; - - pub fn Pattern_White_Space(c: char) -> bool { - Pattern_White_Space_table.lookup(c) - } - const White_Space_table: &super::SmallBoolTrie = &super::SmallBoolTrie { r1: &[ 0, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/src/libcore/unicode/unicode.py b/src/libcore/unicode/unicode.py index 6de5d9e033b93..89894f7932d79 100755 --- a/src/libcore/unicode/unicode.py +++ b/src/libcore/unicode/unicode.py @@ -728,7 +728,7 @@ def generate_property_module(mod, grouped_categories, category_subset): yield "pub(crate) mod %s {\n" % mod for cat in sorted(category_subset): - if cat in ("Cc", "White_Space", "Pattern_White_Space"): + if cat in ("Cc", "White_Space"): generator = generate_small_bool_trie("%s_table" % cat, grouped_categories[cat]) else: generator = generate_bool_trie("%s_table" % cat, grouped_categories[cat]) @@ -841,19 +841,18 @@ def main(): unicode_data = load_unicode_data(get_path(UnicodeFiles.UNICODE_DATA)) load_special_casing(get_path(UnicodeFiles.SPECIAL_CASING), unicode_data) - want_derived = {"XID_Start", "XID_Continue", "Alphabetic", "Lowercase", "Uppercase", + want_derived = {"Alphabetic", "Lowercase", "Uppercase", "Cased", "Case_Ignorable", "Grapheme_Extend"} derived = load_properties(get_path(UnicodeFiles.DERIVED_CORE_PROPERTIES), want_derived) props = load_properties(get_path(UnicodeFiles.PROPS), - {"White_Space", "Join_Control", "Noncharacter_Code_Point", - "Pattern_White_Space"}) + {"White_Space", "Join_Control", "Noncharacter_Code_Point"}) # Category tables for (name, categories, category_subset) in ( ("general_category", unicode_data.general_categories, ["N", "Cc"]), ("derived_property", derived, want_derived), - ("property", props, ["White_Space", "Pattern_White_Space"]) + ("property", props, ["White_Space"]) ): for fragment in generate_property_module(name, categories, category_subset): buf.write(fragment) diff --git a/src/libfmt_macros/Cargo.toml b/src/libfmt_macros/Cargo.toml index 82a9e34c065b1..fff4ec716dfda 100644 --- a/src/libfmt_macros/Cargo.toml +++ b/src/libfmt_macros/Cargo.toml @@ -10,4 +10,4 @@ path = "lib.rs" [dependencies] syntax_pos = { path = "../libsyntax_pos" } - +rustc_lexer = { path = "../librustc_lexer" } diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 985abaf2c1bd5..98fa2bd061516 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -23,6 +23,7 @@ use std::string; use std::iter; use syntax_pos::{InnerSpan, Symbol}; +use rustc_lexer::character_properties::{is_id_start, is_id_continue}; #[derive(Copy, Clone)] struct InnerOffset(usize); @@ -597,12 +598,11 @@ impl<'a> Parser<'a> { } } - /// Parses a word starting at the current position. A word is considered to - /// be an alphabetic character followed by any number of alphanumeric - /// characters. + /// Parses a word starting at the current position. A word is the same as + /// Rust identifier, except that it can't start with `_` character. fn word(&mut self) -> &'a str { let start = match self.cur.peek() { - Some(&(pos, c)) if c.is_xid_start() => { + Some(&(pos, c)) if c != '_' && is_id_start(c) => { self.cur.next(); pos } @@ -611,7 +611,7 @@ impl<'a> Parser<'a> { } }; while let Some(&(pos, c)) = self.cur.peek() { - if c.is_xid_continue() { + if is_id_continue(c) { self.cur.next(); } else { return &self.input[start..pos]; diff --git a/src/librustc_lexer/Cargo.toml b/src/librustc_lexer/Cargo.toml index 0dbcda618ecac..675d3065c5b28 100644 --- a/src/librustc_lexer/Cargo.toml +++ b/src/librustc_lexer/Cargo.toml @@ -4,12 +4,12 @@ name = "rustc_lexer" version = "0.1.0" edition = "2018" -# Note that this crate purposefully does not depend on other rustc crates -[dependencies] -unicode-xid = { version = "0.1.0", optional = true } - # Note: do not remove this blank `[lib]` section. # This will be used when publishing this crate as `rustc-ap-rustc_lexer`. [lib] doctest = false name = "rustc_lexer" + +# Note that this crate purposefully does not depend on other rustc crates +[dependencies] +unicode-xid = "0.2.0" diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index 41b47befaf141..26e5e6fc8c4b1 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -1,6 +1,5 @@ -// We want to be able to build this crate with a stable compiler, so feature -// flags should be optional. -#![cfg_attr(not(feature = "unicode-xid"), feature(unicode_internals))] +// We want to be able to build this crate with a stable compiler, so no +// `#![feature]` attributes should be added. mod cursor; pub mod unescape; @@ -507,23 +506,39 @@ impl Cursor<'_> { } pub mod character_properties { - // this is Pattern_White_Space - #[cfg(feature = "unicode-xid")] + // See [UAX #31](http://unicode.org/reports/tr31) for definitions of these + // classes. + + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. pub fn is_whitespace(c: char) -> bool { match c { - '\u{0009}' | '\u{000A}' | '\u{000B}' | '\u{000C}' | '\u{000D}' | '\u{0020}' - | '\u{0085}' | '\u{200E}' | '\u{200F}' | '\u{2028}' | '\u{2029}' => true, + // Usual ASCII suspects + | '\u{0009}' // \t + | '\u{000A}' // \n + | '\u{000B}' // vertical tab + | '\u{000C}' // form feed + | '\u{000D}' // \r + | '\u{0020}' // space + + // NEXT LINE from latin1 + | '\u{0085}' + + // Bidi markers + | '\u{200E}' // LEFT-TO-RIGHT MARK + | '\u{200F}' // RIGHT-TO-LEFT MARK + + // Dedicated whitespace characters from Unicode + | '\u{2028}' // LINE SEPARATOR + | '\u{2029}' // PARAGRAPH SEPARATOR + => true, _ => false, } } - #[cfg(not(feature = "unicode-xid"))] - pub fn is_whitespace(c: char) -> bool { - core::unicode::property::Pattern_White_Space(c) - } - - // this is XID_Start OR '_' (which formally is not a XID_Start) - #[cfg(feature = "unicode-xid")] + // This is XID_Start OR '_' (which formally is not a XID_Start). pub fn is_id_start(c: char) -> bool { ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') @@ -531,16 +546,7 @@ pub mod character_properties { || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c)) } - #[cfg(not(feature = "unicode-xid"))] - pub fn is_id_start(c: char) -> bool { - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || c == '_' - || (c > '\x7f' && c.is_xid_start()) - } - - // this is XID_Continue - #[cfg(feature = "unicode-xid")] + // This is XID_Continue. pub fn is_id_continue(c: char) -> bool { ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') @@ -548,13 +554,4 @@ pub mod character_properties { || c == '_' || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c)) } - - #[cfg(not(feature = "unicode-xid"))] - pub fn is_id_continue(c: char) -> bool { - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - || (c > '\x7f' && c.is_xid_continue()) - } } diff --git a/src/librustc_mir/Cargo.toml b/src/librustc_mir/Cargo.toml index 21008c737289f..4e68708adfc3e 100644 --- a/src/librustc_mir/Cargo.toml +++ b/src/librustc_mir/Cargo.toml @@ -20,6 +20,7 @@ rustc = { path = "../librustc" } rustc_target = { path = "../librustc_target" } rustc_data_structures = { path = "../librustc_data_structures" } rustc_errors = { path = "../librustc_errors" } +rustc_lexer = { path = "../librustc_lexer" } rustc_serialize = { path = "../libserialize", package = "serialize" } syntax = { path = "../libsyntax" } syntax_pos = { path = "../libsyntax_pos" } diff --git a/src/librustc_mir/borrow_check/move_errors.rs b/src/librustc_mir/borrow_check/move_errors.rs index f10ff71b15e68..abcb70b719741 100644 --- a/src/librustc_mir/borrow_check/move_errors.rs +++ b/src/librustc_mir/borrow_check/move_errors.rs @@ -1,8 +1,7 @@ -use core::unicode::property::Pattern_White_Space; - use rustc::mir::*; use rustc::ty; use rustc_errors::{DiagnosticBuilder,Applicability}; +use rustc_lexer::character_properties::is_whitespace; use syntax_pos::Span; use crate::borrow_check::MirBorrowckCtxt; @@ -526,7 +525,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, 'tcx> { let suggestion; let to_remove; if pat_snippet.starts_with("mut") - && pat_snippet["mut".len()..].starts_with(Pattern_White_Space) + && pat_snippet["mut".len()..].starts_with(is_whitespace) { suggestion = pat_snippet["mut".len()..].trim_start(); to_remove = "&mut"; diff --git a/src/librustc_mir/borrow_check/mutability_errors.rs b/src/librustc_mir/borrow_check/mutability_errors.rs index 5a5534922aa8a..32bf82c8bcde3 100644 --- a/src/librustc_mir/borrow_check/mutability_errors.rs +++ b/src/librustc_mir/borrow_check/mutability_errors.rs @@ -1,4 +1,4 @@ -use core::unicode::property::Pattern_White_Space; +use rustc_lexer::character_properties::is_whitespace; use rustc::hir; use rustc::hir::Node; use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body}; @@ -715,7 +715,7 @@ fn annotate_struct_field( fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option { let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?; if hi_src.starts_with("ref") - && hi_src["ref".len()..].starts_with(Pattern_White_Space) + && hi_src["ref".len()..].starts_with(is_whitespace) { let replacement = format!("ref mut{}", &hi_src["ref".len()..]); Some(replacement) diff --git a/src/librustdoc/lib.rs b/src/librustdoc/lib.rs index dfa0db0d23b74..301946733dc55 100644 --- a/src/librustdoc/lib.rs +++ b/src/librustdoc/lib.rs @@ -33,6 +33,7 @@ extern crate rustc_interface; extern crate rustc_metadata; extern crate rustc_target; extern crate rustc_typeck; +extern crate rustc_lexer; extern crate serialize; extern crate syntax; extern crate syntax_pos; diff --git a/src/librustdoc/test.rs b/src/librustdoc/test.rs index adcc9930b6c33..1105e47d74824 100644 --- a/src/librustdoc/test.rs +++ b/src/librustdoc/test.rs @@ -4,6 +4,7 @@ use rustc::hir; use rustc::hir::intravisit; use rustc::session::{self, config, DiagnosticOutput}; use rustc::util::common::ErrorReported; +use rustc_lexer::character_properties::{is_id_start, is_id_continue}; use syntax::ast; use syntax::with_globals; use syntax::source_map::SourceMap; @@ -763,8 +764,8 @@ impl Tester for Collector { // We use these headings as test names, so it's good if // they're valid identifiers. let name = name.chars().enumerate().map(|(i, c)| { - if (i == 0 && c.is_xid_start()) || - (i != 0 && c.is_xid_continue()) { + if (i == 0 && is_id_start(c)) || + (i != 0 && is_id_continue(c)) { c } else { '_' diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs index 1a26b17dac782..35feb6680f9cc 100644 --- a/src/libsyntax/ext/proc_macro_server.rs +++ b/src/libsyntax/ext/proc_macro_server.rs @@ -6,6 +6,7 @@ use crate::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint} use errors::{Diagnostic, DiagnosticBuilder}; use rustc_data_structures::sync::Lrc; +use rustc_lexer::character_properties::{is_id_start, is_id_continue}; use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span}; use syntax_pos::symbol::{kw, sym, Symbol}; @@ -322,8 +323,7 @@ impl Ident { fn is_valid(string: &str) -> bool { let mut chars = string.chars(); if let Some(start) = chars.next() { - (start == '_' || start.is_xid_start()) - && chars.all(|cont| cont == '_' || cont.is_xid_continue()) + is_id_start(start) && chars.all(is_id_continue) } else { false } diff --git a/src/libsyntax_ext/Cargo.toml b/src/libsyntax_ext/Cargo.toml index 73310df305b32..791ee94b4fa77 100644 --- a/src/libsyntax_ext/Cargo.toml +++ b/src/libsyntax_ext/Cargo.toml @@ -18,3 +18,4 @@ rustc_target = { path = "../librustc_target" } smallvec = { version = "0.6.7", features = ["union", "may_dangle"] } syntax = { path = "../libsyntax" } syntax_pos = { path = "../libsyntax_pos" } +rustc_lexer = { path = "../librustc_lexer" } From 206fe8e1c37d55d0bf3a82baaa23eb5fb148880b Mon Sep 17 00:00:00 2001 From: Aleksey Kladov Date: Wed, 4 Sep 2019 13:16:36 +0300 Subject: [PATCH 2/2] flatten rustc_lexer::character_properties module On the call site, `rustc_lexer::is_whitespace` reads much better than `character_properties::is_whitespace`. --- src/libfmt_macros/lib.rs | 5 +- src/librustc_lexer/src/lib.rs | 135 +++++++++--------- src/librustc_mir/borrow_check/move_errors.rs | 3 +- .../borrow_check/mutability_errors.rs | 3 +- src/librustdoc/test.rs | 5 +- src/libsyntax/ext/proc_macro_server.rs | 3 +- src/libsyntax/tests.rs | 14 +- 7 files changed, 82 insertions(+), 86 deletions(-) diff --git a/src/libfmt_macros/lib.rs b/src/libfmt_macros/lib.rs index 98fa2bd061516..f9c1be20b8bc1 100644 --- a/src/libfmt_macros/lib.rs +++ b/src/libfmt_macros/lib.rs @@ -23,7 +23,6 @@ use std::string; use std::iter; use syntax_pos::{InnerSpan, Symbol}; -use rustc_lexer::character_properties::{is_id_start, is_id_continue}; #[derive(Copy, Clone)] struct InnerOffset(usize); @@ -602,7 +601,7 @@ impl<'a> Parser<'a> { /// Rust identifier, except that it can't start with `_` character. fn word(&mut self) -> &'a str { let start = match self.cur.peek() { - Some(&(pos, c)) if c != '_' && is_id_start(c) => { + Some(&(pos, c)) if c != '_' && rustc_lexer::is_id_start(c) => { self.cur.next(); pos } @@ -611,7 +610,7 @@ impl<'a> Parser<'a> { } }; while let Some(&(pos, c)) = self.cur.peek() { - if is_id_continue(c) { + if rustc_lexer::is_id_continue(c) { self.cur.next(); } else { return &self.input[start..pos]; diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs index 26e5e6fc8c4b1..30a5175d8cdb0 100644 --- a/src/librustc_lexer/src/lib.rs +++ b/src/librustc_lexer/src/lib.rs @@ -102,6 +102,62 @@ pub fn tokenize(mut input: &str) -> impl Iterator + '_ { }) } +// See [UAX #31](http://unicode.org/reports/tr31) for definitions of these +// classes. + +/// True if `c` is considered a whitespace according to Rust language definition. +pub fn is_whitespace(c: char) -> bool { + // This is Pattern_White_Space. + // + // Note that this set is stable (ie, it doesn't change with different + // Unicode versions), so it's ok to just hard-code the values. + + match c { + // Usual ASCII suspects + | '\u{0009}' // \t + | '\u{000A}' // \n + | '\u{000B}' // vertical tab + | '\u{000C}' // form feed + | '\u{000D}' // \r + | '\u{0020}' // space + + // NEXT LINE from latin1 + | '\u{0085}' + + // Bidi markers + | '\u{200E}' // LEFT-TO-RIGHT MARK + | '\u{200F}' // RIGHT-TO-LEFT MARK + + // Dedicated whitespace characters from Unicode + | '\u{2028}' // LINE SEPARATOR + | '\u{2029}' // PARAGRAPH SEPARATOR + => true, + _ => false, + } +} + +/// True if `c` is valid as a first character of an identifier. +pub fn is_id_start(c: char) -> bool { + // This is XID_Start OR '_' (which formally is not a XID_Start). + // We also add fast-path for ascii idents + ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z') + || c == '_' + || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c)) +} + +/// True if `c` is valid as a non-first character of an identifier. +pub fn is_id_continue(c: char) -> bool { + // This is exactly XID_Continue. + // We also add fast-path for ascii idents + ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z') + || ('0' <= c && c <= '9') + || c == '_' + || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c)) +} + + impl Cursor<'_> { fn advance_token(&mut self) -> Token { let first_char = self.bump().unwrap(); @@ -111,9 +167,9 @@ impl Cursor<'_> { '*' => self.block_comment(), _ => Slash, }, - c if character_properties::is_whitespace(c) => self.whitespace(), + c if is_whitespace(c) => self.whitespace(), 'r' => match (self.nth_char(0), self.nth_char(1)) { - ('#', c1) if character_properties::is_id_start(c1) => self.raw_ident(), + ('#', c1) if is_id_start(c1) => self.raw_ident(), ('#', _) | ('"', _) => { let (n_hashes, started, terminated) = self.raw_double_quoted_string(); let suffix_start = self.len_consumed(); @@ -158,7 +214,7 @@ impl Cursor<'_> { } _ => self.ident(), }, - c if character_properties::is_id_start(c) => self.ident(), + c if is_id_start(c) => self.ident(), c @ '0'..='9' => { let literal_kind = self.number(c); let suffix_start = self.len_consumed(); @@ -246,8 +302,8 @@ impl Cursor<'_> { } fn whitespace(&mut self) -> TokenKind { - debug_assert!(character_properties::is_whitespace(self.prev())); - while character_properties::is_whitespace(self.nth_char(0)) { + debug_assert!(is_whitespace(self.prev())); + while is_whitespace(self.nth_char(0)) { self.bump(); } Whitespace @@ -257,19 +313,19 @@ impl Cursor<'_> { debug_assert!( self.prev() == 'r' && self.nth_char(0) == '#' - && character_properties::is_id_start(self.nth_char(1)) + && is_id_start(self.nth_char(1)) ); self.bump(); self.bump(); - while character_properties::is_id_continue(self.nth_char(0)) { + while is_id_continue(self.nth_char(0)) { self.bump(); } RawIdent } fn ident(&mut self) -> TokenKind { - debug_assert!(character_properties::is_id_start(self.prev())); - while character_properties::is_id_continue(self.nth_char(0)) { + debug_assert!(is_id_start(self.prev())); + while is_id_continue(self.nth_char(0)) { self.bump(); } Ident @@ -314,7 +370,7 @@ impl Cursor<'_> { // integer literal followed by field/method access or a range pattern // (`0..2` and `12.foo()`) '.' if self.nth_char(1) != '.' - && !character_properties::is_id_start(self.nth_char(1)) => + && !is_id_start(self.nth_char(1)) => { // might have stuff after the ., and if it does, it needs to start // with a number @@ -344,7 +400,7 @@ impl Cursor<'_> { fn lifetime_or_char(&mut self) -> TokenKind { debug_assert!(self.prev() == '\''); let mut starts_with_number = false; - if (character_properties::is_id_start(self.nth_char(0)) + if (is_id_start(self.nth_char(0)) || self.nth_char(0).is_digit(10) && { starts_with_number = true; true @@ -352,7 +408,7 @@ impl Cursor<'_> { && self.nth_char(1) != '\'' { self.bump(); - while character_properties::is_id_continue(self.nth_char(0)) { + while is_id_continue(self.nth_char(0)) { self.bump(); } @@ -494,64 +550,13 @@ impl Cursor<'_> { } fn eat_literal_suffix(&mut self) { - if !character_properties::is_id_start(self.nth_char(0)) { + if !is_id_start(self.nth_char(0)) { return; } self.bump(); - while character_properties::is_id_continue(self.nth_char(0)) { + while is_id_continue(self.nth_char(0)) { self.bump(); } } } - -pub mod character_properties { - // See [UAX #31](http://unicode.org/reports/tr31) for definitions of these - // classes. - - // This is Pattern_White_Space. - // - // Note that this set is stable (ie, it doesn't change with different - // Unicode versions), so it's ok to just hard-code the values. - pub fn is_whitespace(c: char) -> bool { - match c { - // Usual ASCII suspects - | '\u{0009}' // \t - | '\u{000A}' // \n - | '\u{000B}' // vertical tab - | '\u{000C}' // form feed - | '\u{000D}' // \r - | '\u{0020}' // space - - // NEXT LINE from latin1 - | '\u{0085}' - - // Bidi markers - | '\u{200E}' // LEFT-TO-RIGHT MARK - | '\u{200F}' // RIGHT-TO-LEFT MARK - - // Dedicated whitespace characters from Unicode - | '\u{2028}' // LINE SEPARATOR - | '\u{2029}' // PARAGRAPH SEPARATOR - => true, - _ => false, - } - } - - // This is XID_Start OR '_' (which formally is not a XID_Start). - pub fn is_id_start(c: char) -> bool { - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_start(c)) - } - - // This is XID_Continue. - pub fn is_id_continue(c: char) -> bool { - ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z') - || ('0' <= c && c <= '9') - || c == '_' - || (c > '\x7f' && unicode_xid::UnicodeXID::is_xid_continue(c)) - } -} diff --git a/src/librustc_mir/borrow_check/move_errors.rs b/src/librustc_mir/borrow_check/move_errors.rs index abcb70b719741..0d13db2f5a413 100644 --- a/src/librustc_mir/borrow_check/move_errors.rs +++ b/src/librustc_mir/borrow_check/move_errors.rs @@ -1,7 +1,6 @@ use rustc::mir::*; use rustc::ty; use rustc_errors::{DiagnosticBuilder,Applicability}; -use rustc_lexer::character_properties::is_whitespace; use syntax_pos::Span; use crate::borrow_check::MirBorrowckCtxt; @@ -525,7 +524,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, 'tcx> { let suggestion; let to_remove; if pat_snippet.starts_with("mut") - && pat_snippet["mut".len()..].starts_with(is_whitespace) + && pat_snippet["mut".len()..].starts_with(rustc_lexer::is_whitespace) { suggestion = pat_snippet["mut".len()..].trim_start(); to_remove = "&mut"; diff --git a/src/librustc_mir/borrow_check/mutability_errors.rs b/src/librustc_mir/borrow_check/mutability_errors.rs index 32bf82c8bcde3..8f2ce80aafa22 100644 --- a/src/librustc_mir/borrow_check/mutability_errors.rs +++ b/src/librustc_mir/borrow_check/mutability_errors.rs @@ -1,4 +1,3 @@ -use rustc_lexer::character_properties::is_whitespace; use rustc::hir; use rustc::hir::Node; use rustc::mir::{self, BindingForm, ClearCrossCrate, Local, Location, Body}; @@ -715,7 +714,7 @@ fn annotate_struct_field( fn suggest_ref_mut(tcx: TyCtxt<'_>, binding_span: Span) -> Option { let hi_src = tcx.sess.source_map().span_to_snippet(binding_span).ok()?; if hi_src.starts_with("ref") - && hi_src["ref".len()..].starts_with(is_whitespace) + && hi_src["ref".len()..].starts_with(rustc_lexer::is_whitespace) { let replacement = format!("ref mut{}", &hi_src["ref".len()..]); Some(replacement) diff --git a/src/librustdoc/test.rs b/src/librustdoc/test.rs index 1105e47d74824..000d2843adce3 100644 --- a/src/librustdoc/test.rs +++ b/src/librustdoc/test.rs @@ -4,7 +4,6 @@ use rustc::hir; use rustc::hir::intravisit; use rustc::session::{self, config, DiagnosticOutput}; use rustc::util::common::ErrorReported; -use rustc_lexer::character_properties::{is_id_start, is_id_continue}; use syntax::ast; use syntax::with_globals; use syntax::source_map::SourceMap; @@ -764,8 +763,8 @@ impl Tester for Collector { // We use these headings as test names, so it's good if // they're valid identifiers. let name = name.chars().enumerate().map(|(i, c)| { - if (i == 0 && is_id_start(c)) || - (i != 0 && is_id_continue(c)) { + if (i == 0 && rustc_lexer::is_id_start(c)) || + (i != 0 && rustc_lexer::is_id_continue(c)) { c } else { '_' diff --git a/src/libsyntax/ext/proc_macro_server.rs b/src/libsyntax/ext/proc_macro_server.rs index 35feb6680f9cc..544ec789d80a9 100644 --- a/src/libsyntax/ext/proc_macro_server.rs +++ b/src/libsyntax/ext/proc_macro_server.rs @@ -6,7 +6,6 @@ use crate::tokenstream::{self, DelimSpan, IsJoint::*, TokenStream, TreeAndJoint} use errors::{Diagnostic, DiagnosticBuilder}; use rustc_data_structures::sync::Lrc; -use rustc_lexer::character_properties::{is_id_start, is_id_continue}; use syntax_pos::{BytePos, FileName, MultiSpan, Pos, SourceFile, Span}; use syntax_pos::symbol::{kw, sym, Symbol}; @@ -323,7 +322,7 @@ impl Ident { fn is_valid(string: &str) -> bool { let mut chars = string.chars(); if let Some(start) = chars.next() { - is_id_start(start) && chars.all(is_id_continue) + rustc_lexer::is_id_start(start) && chars.all(rustc_lexer::is_id_continue) } else { false } diff --git a/src/libsyntax/tests.rs b/src/libsyntax/tests.rs index c472212bc2096..9b90b31f2d2a2 100644 --- a/src/libsyntax/tests.rs +++ b/src/libsyntax/tests.rs @@ -63,7 +63,7 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool { (None, None) => return true, (None, _) => return false, (Some(&a), None) => { - if is_pattern_whitespace(a) { + if rustc_lexer::is_whitespace(a) { break // trailing whitespace check is out of loop for borrowck } else { return false @@ -72,11 +72,11 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool { (Some(&a), Some(&b)) => (a, b) }; - if is_pattern_whitespace(a) && is_pattern_whitespace(b) { + if rustc_lexer::is_whitespace(a) && rustc_lexer::is_whitespace(b) { // skip whitespace for a and b scan_for_non_ws_or_end(&mut a_iter); scan_for_non_ws_or_end(&mut b_iter); - } else if is_pattern_whitespace(a) { + } else if rustc_lexer::is_whitespace(a) { // skip whitespace for a scan_for_non_ws_or_end(&mut a_iter); } else if a == b { @@ -88,20 +88,16 @@ crate fn matches_codepattern(a : &str, b : &str) -> bool { } // check if a has *only* trailing whitespace - a_iter.all(is_pattern_whitespace) + a_iter.all(rustc_lexer::is_whitespace) } /// Advances the given peekable `Iterator` until it reaches a non-whitespace character fn scan_for_non_ws_or_end>(iter: &mut Peekable) { - while iter.peek().copied().map(|c| is_pattern_whitespace(c)) == Some(true) { + while iter.peek().copied().map(|c| rustc_lexer::is_whitespace(c)) == Some(true) { iter.next(); } } -fn is_pattern_whitespace(c: char) -> bool { - rustc_lexer::character_properties::is_whitespace(c) -} - /// Identify a position in the text by the Nth occurrence of a string. struct Position { string: &'static str,