Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon

This is not a complete implementation of the RFC: - only existing methods got updated, no new ones added - doc comments are not extensive enough yet - optimizations got lost and need to be reimplemented See rust-lang/rfcs#528 Technically a [breaking-change]
rust-lang · Feb 22, 2015 · 67eb38e · 67eb38e
2 parents dcc6ce2 + c8dd2d0
commit 67eb38e
Show file tree

Hide file tree

Showing 20 changed files with 1,076 additions and 350 deletions.
diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs
@@ -58,7 +58,7 @@ pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
 fn parse_expected(last_nonfollow_error: Option<uint>,
                   line_num: uint,
                   line: &str) -> Option<(WhichLine, ExpectedError)> {
-    let start = match line.find_str("//~") { Some(i) => i, None => return None };
+    let start = match line.find("//~") { Some(i) => i, None => return None };
     let (follow, adjusts) = if line.char_at(start + 3) == '|' {
         (true, 0)
     } else {

diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs
@@ -330,7 +330,7 @@ fn parse_name_directive(line: &str, directive: &str) -> bool {
 pub fn parse_name_value_directive(line: &str, directive: &str)
                                   -> Option<String> {
     let keycolon = format!("{}:", directive);
-    match line.find_str(&keycolon) {
+    match line.find(&keycolon) {
         Some(colon) => {
             let value = line[(colon + keycolon.len()) .. line.len()].to_string();
             debug!("{}: {}", directive, value);

diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs
@@ -847,7 +847,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
             check_lines.iter().map(|s| {
                 s
                  .trim()
-                 .split_str("[...]")
+                 .split("[...]")
                  .map(|x| x.to_string())
                  .collect()
             }).collect();
@@ -866,7 +866,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
                         None
                     }
                 } else {
-                    rest.find_str(frag)
+                    rest.find(frag)
                 };
                 match found {
                     None => {

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
@@ -82,6 +82,8 @@ pub use core::str::{SplitN, RSplitN};
 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
 pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
+pub use core::str::Pattern;
+pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};
 
 /*
 Section: Creating a string
@@ -530,7 +532,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert!("bananas".contains("nana"));
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn contains(&self, pat: &str) -> bool {
+    fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
         core_str::StrExt::contains(&self[..], pat)
     }
 
@@ -545,9 +547,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// ```rust
     /// assert!("hello".contains_char('e'));
     /// ```
-    #[unstable(feature = "collections",
-               reason = "might get removed in favour of a more generic contains()")]
-    fn contains_char<P: CharEq>(&self, pat: P) -> bool {
+    #[unstable(feature = "collections")]
+    #[deprecated(since = "1.0.0", reason = "use `contains()` with a char")]
+    fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
         core_str::StrExt::contains_char(&self[..], pat)
     }
 
@@ -603,7 +605,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(v, vec![""]);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn split<P: CharEq>(&self, pat: P) -> Split<P> {
+    fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
         core_str::StrExt::split(&self[..], pat)
     }
 
@@ -630,7 +632,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(v, vec![""]);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
+    fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
         core_str::StrExt::splitn(&self[..], count, pat)
     }
 
@@ -658,8 +660,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
     /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
     /// ```
-    #[unstable(feature = "collections", reason = "might get removed")]
-    fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
+    #[stable(feature = "rust1", since = "1.0.0")]
+    fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
         core_str::StrExt::split_terminator(&self[..], pat)
     }
 
@@ -680,7 +682,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
+    fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
         core_str::StrExt::rsplitn(&self[..], count, pat)
     }
 
@@ -706,7 +708,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// ```
     #[unstable(feature = "collections",
                reason = "might have its iterator type changed")]
-    fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
+    // NB: Right now MatchIndices yields `(usize, usize)`,
+    // but it would be more consistent and useful to return `(usize, &str)`
+    fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
         core_str::StrExt::match_indices(&self[..], pat)
     }
 
@@ -721,9 +725,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
     /// assert_eq!(v, vec!["1", "", "2"]);
     /// ```
-    #[unstable(feature = "collections",
-               reason = "might get removed in the future in favor of a more generic split()")]
-    fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
+    #[unstable(feature = "collections")]
+    #[deprecated(since = "1.0.0", reason = "use `split()` with a `&str`")]
+    fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
         core_str::StrExt::split_str(&self[..], pat)
     }
 
@@ -825,7 +829,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert!("banana".starts_with("ba"));
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn starts_with(&self, pat: &str) -> bool {
+    fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
         core_str::StrExt::starts_with(&self[..], pat)
     }
 
@@ -837,7 +841,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert!("banana".ends_with("nana"));
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn ends_with(&self, pat: &str) -> bool {
+    fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
+        where P::Searcher: ReverseSearcher<'a>
+    {
         core_str::StrExt::ends_with(&self[..], pat)
     }
 
@@ -857,7 +863,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
+    fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Searcher: DoubleEndedSearcher<'a>
+    {
         core_str::StrExt::trim_matches(&self[..], pat)
     }
 
@@ -877,7 +885,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
+    fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
         core_str::StrExt::trim_left_matches(&self[..], pat)
     }
 
@@ -897,7 +905,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
+    fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
+        where P::Searcher: ReverseSearcher<'a>
+    {
         core_str::StrExt::trim_right_matches(&self[..], pat)
     }
 
@@ -1074,7 +1084,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(s.find(x), None);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn find<P: CharEq>(&self, pat: P) -> Option<usize> {
+    fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
         core_str::StrExt::find(&self[..], pat)
     }
 
@@ -1102,7 +1112,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(s.rfind(x), None);
     /// ```
     #[stable(feature = "rust1", since = "1.0.0")]
-    fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> {
+    fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
+        where P::Searcher: ReverseSearcher<'a>
+    {
         core_str::StrExt::rfind(&self[..], pat)
     }
 
@@ -1125,9 +1137,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
     /// assert_eq!(s.find_str("老虎 L"), Some(6));
     /// assert_eq!(s.find_str("muffin man"), None);
     /// ```
-    #[unstable(feature = "collections",
-               reason = "might get removed in favor of a more generic find in the future")]
-    fn find_str(&self, needle: &str) -> Option<usize> {
+    #[unstable(feature = "collections")]
+    #[deprecated(since = "1.0.0", reason = "use `find()` with a `&str`")]
+    fn find_str<'a, P: Pattern<'a>>(&'a self, needle: P) -> Option<usize> {
         core_str::StrExt::find_str(&self[..], needle)
     }
 
@@ -2887,22 +2899,6 @@ mod bench {
         b.iter(|| assert_eq!(s.split('V').count(), 3));
     }
 
-    #[bench]
-    fn split_unicode_not_ascii(b: &mut Bencher) {
-        struct NotAscii(char);
-        impl CharEq for NotAscii {
-            fn matches(&mut self, c: char) -> bool {
-                let NotAscii(cc) = *self;
-                cc == c
-            }
-            fn only_ascii(&self) -> bool { false }
-        }
-        let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
-
-        b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
-    }
-
-
     #[bench]
     fn split_ascii(b: &mut Bencher) {
         let s = "Mary had a little lamb, Little lamb, little-lamb.";
@@ -2911,23 +2907,6 @@ mod bench {
         b.iter(|| assert_eq!(s.split(' ').count(), len));
     }
 
-    #[bench]
-    fn split_not_ascii(b: &mut Bencher) {
-        struct NotAscii(char);
-        impl CharEq for NotAscii {
-            #[inline]
-            fn matches(&mut self, c: char) -> bool {
-                let NotAscii(cc) = *self;
-                cc == c
-            }
-            fn only_ascii(&self) -> bool { false }
-        }
-        let s = "Mary had a little lamb, Little lamb, little-lamb.";
-        let len = s.split(' ').count();
-
-        b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
-    }
-
     #[bench]
     fn split_extern_fn(b: &mut Bencher) {
         let s = "Mary had a little lamb, Little lamb, little-lamb.";

diff --git a/src/libcore/char.rs b/src/libcore/char.rs
@@ -22,13 +22,13 @@ use option::Option;
 use slice::SliceExt;
 
 // UTF-8 ranges and tags for encoding characters
-static TAG_CONT: u8    = 0b1000_0000u8;
-static TAG_TWO_B: u8   = 0b1100_0000u8;
-static TAG_THREE_B: u8 = 0b1110_0000u8;
-static TAG_FOUR_B: u8  = 0b1111_0000u8;
-static MAX_ONE_B: u32   =     0x80u32;
-static MAX_TWO_B: u32   =    0x800u32;
-static MAX_THREE_B: u32 =  0x10000u32;
+const TAG_CONT: u8    = 0b1000_0000u8;
+const TAG_TWO_B: u8   = 0b1100_0000u8;
+const TAG_THREE_B: u8 = 0b1110_0000u8;
+const TAG_FOUR_B: u8  = 0b1111_0000u8;
+const MAX_ONE_B: u32   =     0x80u32;
+const MAX_TWO_B: u32   =    0x800u32;
+const MAX_THREE_B: u32 =  0x10000u32;
 
 /*
     Lu  Uppercase_Letter        an uppercase letter
@@ -398,11 +398,14 @@ impl CharExt for char {
     #[stable(feature = "rust1", since = "1.0.0")]
     fn len_utf8(self) -> usize {
         let code = self as u32;
-        match () {
-            _ if code < MAX_ONE_B   => 1,
-            _ if code < MAX_TWO_B   => 2,
-            _ if code < MAX_THREE_B => 3,
-            _  => 4,
+        if code < MAX_ONE_B {
+            1
+        } else if code < MAX_TWO_B {
+            2
+        } else if code < MAX_THREE_B {
+            3
+        } else {
+            4
         }
     }
 

diff --git a/src/libcore/slice.rs b/src/libcore/slice.rs
@@ -657,6 +657,8 @@ macro_rules! iterator {
             fn next(&mut self) -> Option<$elem> {
                 // could be implemented with slices, but this avoids bounds checks
                 unsafe {
+                    ::intrinsics::assume(!self.ptr.is_null());
+                    ::intrinsics::assume(!self.end.is_null());
                     if self.ptr == self.end {
                         None
                     } else {
@@ -693,6 +695,8 @@ macro_rules! iterator {
             fn next_back(&mut self) -> Option<$elem> {
                 // could be implemented with slices, but this avoids bounds checks
                 unsafe {
+                    ::intrinsics::assume(!self.ptr.is_null());
+                    ::intrinsics::assume(!self.end.is_null());
                     if self.end == self.ptr {
                         None
                     } else {