Skip to content

Commit

Permalink
Auto merge of #22466 - Kimundi:str_pattern_ai_safe, r=aturon
Browse files Browse the repository at this point in the history
This is not a complete implementation of the RFC:

- only existing methods got updated, no new ones added
- doc comments are not extensive enough yet
- optimizations got lost and need to be reimplemented

See rust-lang/rfcs#528

Technically a

[breaking-change]
  • Loading branch information
bors committed Feb 22, 2015
2 parents dcc6ce2 + c8dd2d0 commit 67eb38e
Show file tree
Hide file tree
Showing 20 changed files with 1,076 additions and 350 deletions.
2 changes: 1 addition & 1 deletion src/compiletest/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub fn load_errors(testfile: &Path) -> Vec<ExpectedError> {
fn parse_expected(last_nonfollow_error: Option<uint>,
line_num: uint,
line: &str) -> Option<(WhichLine, ExpectedError)> {
let start = match line.find_str("//~") { Some(i) => i, None => return None };
let start = match line.find("//~") { Some(i) => i, None => return None };
let (follow, adjusts) = if line.char_at(start + 3) == '|' {
(true, 0)
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/compiletest/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ fn parse_name_directive(line: &str, directive: &str) -> bool {
pub fn parse_name_value_directive(line: &str, directive: &str)
-> Option<String> {
let keycolon = format!("{}:", directive);
match line.find_str(&keycolon) {
match line.find(&keycolon) {
Some(colon) => {
let value = line[(colon + keycolon.len()) .. line.len()].to_string();
debug!("{}: {}", directive, value);
Expand Down
4 changes: 2 additions & 2 deletions src/compiletest/runtest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -847,7 +847,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
check_lines.iter().map(|s| {
s
.trim()
.split_str("[...]")
.split("[...]")
.map(|x| x.to_string())
.collect()
}).collect();
Expand All @@ -866,7 +866,7 @@ fn check_debugger_output(debugger_run_result: &ProcRes, check_lines: &[String])
None
}
} else {
rest.find_str(frag)
rest.find(frag)
};
match found {
None => {
Expand Down
91 changes: 35 additions & 56 deletions src/libcollections/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ pub use core::str::{SplitN, RSplitN};
pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
pub use core::str::{from_utf8_unchecked, from_c_str, ParseBoolError};
pub use unicode::str::{Words, Graphemes, GraphemeIndices};
pub use core::str::Pattern;
pub use core::str::{Searcher, ReverseSearcher, DoubleEndedSearcher, SearchStep};

/*
Section: Creating a string
Expand Down Expand Up @@ -530,7 +532,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("bananas".contains("nana"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn contains(&self, pat: &str) -> bool {
fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains(&self[..], pat)
}

Expand All @@ -545,9 +547,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ```rust
/// assert!("hello".contains_char('e'));
/// ```
#[unstable(feature = "collections",
reason = "might get removed in favour of a more generic contains()")]
fn contains_char<P: CharEq>(&self, pat: P) -> bool {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `contains()` with a char")]
fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::contains_char(&self[..], pat)
}

Expand Down Expand Up @@ -603,7 +605,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn split<P: CharEq>(&self, pat: P) -> Split<P> {
fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
core_str::StrExt::split(&self[..], pat)
}

Expand All @@ -630,7 +632,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec![""]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn splitn<P: CharEq>(&self, count: usize, pat: P) -> SplitN<P> {
fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
core_str::StrExt::splitn(&self[..], count, pat)
}

Expand Down Expand Up @@ -658,8 +660,8 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
/// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
/// ```
#[unstable(feature = "collections", reason = "might get removed")]
fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
#[stable(feature = "rust1", since = "1.0.0")]
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
core_str::StrExt::split_terminator(&self[..], pat)
}

Expand All @@ -680,7 +682,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn rsplitn<P: CharEq>(&self, count: usize, pat: P) -> RSplitN<P> {
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P> {
core_str::StrExt::rsplitn(&self[..], count, pat)
}

Expand All @@ -706,7 +708,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// ```
#[unstable(feature = "collections",
reason = "might have its iterator type changed")]
fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
// NB: Right now MatchIndices yields `(usize, usize)`,
// but it would be more consistent and useful to return `(usize, &str)`
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
core_str::StrExt::match_indices(&self[..], pat)
}

Expand All @@ -721,9 +725,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
/// assert_eq!(v, vec!["1", "", "2"]);
/// ```
#[unstable(feature = "collections",
reason = "might get removed in the future in favor of a more generic split()")]
fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `split()` with a `&str`")]
fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P> {
core_str::StrExt::split_str(&self[..], pat)
}

Expand Down Expand Up @@ -825,7 +829,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".starts_with("ba"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn starts_with(&self, pat: &str) -> bool {
fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
core_str::StrExt::starts_with(&self[..], pat)
}

Expand All @@ -837,7 +841,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert!("banana".ends_with("nana"));
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn ends_with(&self, pat: &str) -> bool {
fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::ends_with(&self[..], pat)
}

Expand All @@ -857,7 +863,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_matches(|c: char| c.is_numeric()), "foo1bar");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: DoubleEndedSearcher<'a>
{
core_str::StrExt::trim_matches(&self[..], pat)
}

Expand All @@ -877,7 +885,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_left_matches(|c: char| c.is_numeric()), "foo1bar123");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
core_str::StrExt::trim_left_matches(&self[..], pat)
}

Expand All @@ -897,7 +905,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!("123foo1bar123".trim_right_matches(|c: char| c.is_numeric()), "123foo1bar");
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::trim_right_matches(&self[..], pat)
}

Expand Down Expand Up @@ -1074,7 +1084,7 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find(x), None);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn find<P: CharEq>(&self, pat: P) -> Option<usize> {
fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
core_str::StrExt::find(&self[..], pat)
}

Expand Down Expand Up @@ -1102,7 +1112,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.rfind(x), None);
/// ```
#[stable(feature = "rust1", since = "1.0.0")]
fn rfind<P: CharEq>(&self, pat: P) -> Option<usize> {
fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
where P::Searcher: ReverseSearcher<'a>
{
core_str::StrExt::rfind(&self[..], pat)
}

Expand All @@ -1125,9 +1137,9 @@ pub trait StrExt: Index<RangeFull, Output = str> {
/// assert_eq!(s.find_str("老虎 L"), Some(6));
/// assert_eq!(s.find_str("muffin man"), None);
/// ```
#[unstable(feature = "collections",
reason = "might get removed in favor of a more generic find in the future")]
fn find_str(&self, needle: &str) -> Option<usize> {
#[unstable(feature = "collections")]
#[deprecated(since = "1.0.0", reason = "use `find()` with a `&str`")]
fn find_str<'a, P: Pattern<'a>>(&'a self, needle: P) -> Option<usize> {
core_str::StrExt::find_str(&self[..], needle)
}

Expand Down Expand Up @@ -2887,22 +2899,6 @@ mod bench {
b.iter(|| assert_eq!(s.split('V').count(), 3));
}

#[bench]
fn split_unicode_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";

b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
}


#[bench]
fn split_ascii(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand All @@ -2911,23 +2907,6 @@ mod bench {
b.iter(|| assert_eq!(s.split(' ').count(), len));
}

#[bench]
fn split_not_ascii(b: &mut Bencher) {
struct NotAscii(char);
impl CharEq for NotAscii {
#[inline]
fn matches(&mut self, c: char) -> bool {
let NotAscii(cc) = *self;
cc == c
}
fn only_ascii(&self) -> bool { false }
}
let s = "Mary had a little lamb, Little lamb, little-lamb.";
let len = s.split(' ').count();

b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
}

#[bench]
fn split_extern_fn(b: &mut Bencher) {
let s = "Mary had a little lamb, Little lamb, little-lamb.";
Expand Down
27 changes: 15 additions & 12 deletions src/libcore/char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ use option::Option;
use slice::SliceExt;

// UTF-8 ranges and tags for encoding characters
static TAG_CONT: u8 = 0b1000_0000u8;
static TAG_TWO_B: u8 = 0b1100_0000u8;
static TAG_THREE_B: u8 = 0b1110_0000u8;
static TAG_FOUR_B: u8 = 0b1111_0000u8;
static MAX_ONE_B: u32 = 0x80u32;
static MAX_TWO_B: u32 = 0x800u32;
static MAX_THREE_B: u32 = 0x10000u32;
const TAG_CONT: u8 = 0b1000_0000u8;
const TAG_TWO_B: u8 = 0b1100_0000u8;
const TAG_THREE_B: u8 = 0b1110_0000u8;
const TAG_FOUR_B: u8 = 0b1111_0000u8;
const MAX_ONE_B: u32 = 0x80u32;
const MAX_TWO_B: u32 = 0x800u32;
const MAX_THREE_B: u32 = 0x10000u32;

/*
Lu Uppercase_Letter an uppercase letter
Expand Down Expand Up @@ -398,11 +398,14 @@ impl CharExt for char {
#[stable(feature = "rust1", since = "1.0.0")]
fn len_utf8(self) -> usize {
let code = self as u32;
match () {
_ if code < MAX_ONE_B => 1,
_ if code < MAX_TWO_B => 2,
_ if code < MAX_THREE_B => 3,
_ => 4,
if code < MAX_ONE_B {
1
} else if code < MAX_TWO_B {
2
} else if code < MAX_THREE_B {
3
} else {
4
}
}

Expand Down
4 changes: 4 additions & 0 deletions src/libcore/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -657,6 +657,8 @@ macro_rules! iterator {
fn next(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
if self.ptr == self.end {
None
} else {
Expand Down Expand Up @@ -693,6 +695,8 @@ macro_rules! iterator {
fn next_back(&mut self) -> Option<$elem> {
// could be implemented with slices, but this avoids bounds checks
unsafe {
::intrinsics::assume(!self.ptr.is_null());
::intrinsics::assume(!self.end.is_null());
if self.end == self.ptr {
None
} else {
Expand Down
Loading

0 comments on commit 67eb38e

Please sign in to comment.