From 8a5a08e04d16678d9f843069a5fd28197a663619 Mon Sep 17 00:00:00 2001 From: Gareth Smith Date: Thu, 1 Aug 2013 23:48:22 +0100 Subject: [PATCH] Replace os::glob with extra::glob, which is written in rust, fixing issue #6100. --- src/libextra/extra.rs | 1 + src/libextra/glob.rs | 348 ++++++++++++++++++++++++++++++++++++++++++ src/libstd/os.rs | 82 ---------- 3 files changed, 349 insertions(+), 82 deletions(-) create mode 100644 src/libextra/glob.rs diff --git a/src/libextra/extra.rs b/src/libextra/extra.rs index f4fb7bcd76c99..735c5f0136aca 100644 --- a/src/libextra/extra.rs +++ b/src/libextra/extra.rs @@ -83,6 +83,7 @@ pub mod getopts; pub mod json; pub mod md4; pub mod tempfile; +pub mod glob; pub mod term; pub mod time; pub mod arena; diff --git a/src/libextra/glob.rs b/src/libextra/glob.rs new file mode 100644 index 0000000000000..f57a857b272e1 --- /dev/null +++ b/src/libextra/glob.rs @@ -0,0 +1,348 @@ +// Copyright 2013 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + + +use std::{os, uint}; + +use sort; + + +/** + * An iterator that yields Paths from the filesystem that match a particular + * pattern - see the glob function for more details. + */ +pub struct GlobIterator { + priv root: Path, + priv dir_patterns: ~[~[PatternToken]], + priv todo: ~[Path] +} + +enum PatternToken { + Char(char), + AnyChar, + AnySequence, + AnyWithin(~[char]), + AnyExcept(~[char]), +} + +/** + * Return an iterator that produces all the paths that match the given pattern, + * which may be absolute or relative to the current working directory. + * + * This function accepts Unix shell style patterns: + * '?' matches any single character. + * '*' matches any (possibly empty) sequence of characters. + * '[...]' matches any character inside the brackets, unless the first character + * is '!' in which case it matches any character except those between + * the '!' and the ']'. + * + * The metacharacters '?', '*', '[', ']' can be matched by using brackets (e.g. '[?]') + * + * Paths are yielded in alphabetical order, as absolute paths. + */ +pub fn glob(pattern: &str) -> GlobIterator { + + // note that this relies on the glob meta characters not + // having any special meaning in actual pathnames + let path = Path(pattern); + let dir_patterns = path.components.map(|s| compile_pattern(*s)); + + let root = if path.is_absolute() { + Path {components: ~[], .. path} // preserve windows path host/device + } else { + os::getcwd() + }; + let todo = list_dir_sorted(&root); + + GlobIterator { + root: root, + dir_patterns: dir_patterns, + todo: todo, + } +} + +impl Iterator for GlobIterator { + + fn next(&mut self) -> Option { + loop { + if self.dir_patterns.is_empty() || self.todo.is_empty() { + return None; + } + + let path = self.todo.pop(); + let pattern_index = path.components.len() - self.root.components.len() - 1; + + if pattern_matches(*path.components.last(), self.dir_patterns[pattern_index]) { + + if pattern_index == self.dir_patterns.len() - 1 { + // it is not possible for a pattern to match a directory *AND* its children + // so we don't need to check the children + return Some(path); + } else { + self.todo.push_all(list_dir_sorted(&path)); + } + } + } + } + +} + +fn list_dir_sorted(path: &Path) -> ~[Path] { + let mut children = os::list_dir_path(path); + sort::quick_sort(children, |p1, p2| p2.components.last() <= p1.components.last()); + children +} + +fn compile_pattern(pattern_str: &str) -> ~[PatternToken] { + let mut pattern = ~[]; + + let mut pattern_iter = pattern_str.iter(); + loop { + let pchar = match pattern_iter.next() { + None => break, + Some(c) => c, + }; + match pchar { + '?' => { + pattern.push(AnyChar); + } + '*' => { + pattern.push(AnySequence); + } + '[' => { + let mut chars = ~[]; + let is_except = match pattern_iter.next() { + None => false, // let the following loop fail with a message + Some('!') => true, + Some(c) => { + chars.push(c); + false + } + }; + loop { + match pattern_iter.next() { + None => fail!("invalid pattern syntax due to unclosed bracket: %s", + pattern_str), + Some(']') => break, + Some(c) => chars.push(c), + } + } + pattern.push(if is_except { AnyExcept(chars) } else { AnyWithin(chars) }); + } + c => { + pattern.push(Char(c)); + } + } + } + + pattern +} + +fn pattern_matches(mut file: &str, pattern: &[PatternToken]) -> bool { + + for uint::range(0, pattern.len()) |pi| { + match pattern[pi] { + AnySequence => { + loop { + if pattern_matches(file, pattern.slice_from(pi + 1)) { + return true; + } + if file.is_empty() { + return false; + } + file = file.slice_shift_char().second(); + } + } + _ => { + if file.is_empty() { + return false; + } + let (c, next) = file.slice_shift_char(); + let matches = match pattern[pi] { + AnyChar => true, + AnyWithin(ref chars) => chars.contains(&c), + AnyExcept(ref chars) => !chars.contains(&c), + Char(c2) => c == c2, + AnySequence => fail!(), + }; + if !matches { + return false; + } + file = next; + } + } + } + + file.is_empty() +} + +#[cfg(test)] +mod test { + use std::{io, os, unstable}; + use super::*; + + #[test] + fn test_relative_pattern() { + + fn mk_file(path: &str, directory: bool) { + if directory { + os::make_dir(&Path(path), 0xFFFF); + } else { + io::mk_file_writer(&Path(path), [io::Create]); + } + } + + fn abs_path(path: &str) -> Path { + os::getcwd().push_many(Path(path).components) + } + + fn glob_vec(pattern: &str) -> ~[Path] { + glob(pattern).collect() + } + + mk_file("tmp", true); + mk_file("tmp/glob-tests", true); + + do unstable::change_dir_locked(&Path("tmp/glob-tests")) { + + mk_file("aaa", true); + mk_file("aaa/apple", true); + mk_file("aaa/orange", true); + mk_file("aaa/tomato", true); + mk_file("aaa/tomato/tomato.txt", false); + mk_file("aaa/tomato/tomoto.txt", false); + mk_file("bbb", true); + mk_file("bbb/specials", true); + mk_file("bbb/specials/!", false); + + // windows does not allow some meta characters to exist in filenames + if os::consts::FAMILY != os::consts::windows::FAMILY { + mk_file("bbb/specials/*", false); + mk_file("bbb/specials/?", false); + } + + mk_file("bbb/specials/[", false); + mk_file("bbb/specials/]", false); + mk_file("ccc", true); + mk_file("xyz", true); + mk_file("xyz/x", false); + mk_file("xyz/y", false); + mk_file("xyz/z", false); + + assert_eq!(glob_vec(""), ~[]); + assert_eq!(glob_vec("."), ~[]); + assert_eq!(glob_vec(".."), ~[]); + + assert_eq!(glob_vec("aaa"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aaa/"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("a"), ~[]); + assert_eq!(glob_vec("aa"), ~[]); + assert_eq!(glob_vec("aaaa"), ~[]); + + assert_eq!(glob_vec("aaa/apple"), ~[abs_path("aaa/apple")]); + assert_eq!(glob_vec("aaa/apple/nope"), ~[]); + + // windows should support both / and \ as directory separators + if os::consts::FAMILY == os::consts::windows::FAMILY { + assert_eq!(glob_vec("aaa\\apple"), ~[abs_path("aaa/apple")]); + } + + assert_eq!(glob_vec("???/"), ~[ + abs_path("aaa"), + abs_path("bbb"), + abs_path("ccc"), + abs_path("xyz")]); + + assert_eq!(glob_vec("aaa/tomato/tom?to.txt"), ~[ + abs_path("aaa/tomato/tomato.txt"), + abs_path("aaa/tomato/tomoto.txt")]); + + assert_eq!(glob_vec("xyz/?"), ~[ + abs_path("xyz/x"), + abs_path("xyz/y"), + abs_path("xyz/z")]); + + assert_eq!(glob_vec("a*"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("*a*"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("a*a"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aaa*"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("*aaa"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("*aaa*"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("*a*a*a*"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aaa*/"), ~[abs_path("aaa")]); + + assert_eq!(glob_vec("aaa/*"), ~[ + abs_path("aaa/apple"), + abs_path("aaa/orange"), + abs_path("aaa/tomato")]); + + assert_eq!(glob_vec("aaa/*a*"), ~[ + abs_path("aaa/apple"), + abs_path("aaa/orange"), + abs_path("aaa/tomato")]); + + assert_eq!(glob_vec("*/*/*.txt"), ~[ + abs_path("aaa/tomato/tomato.txt"), + abs_path("aaa/tomato/tomoto.txt")]); + + assert_eq!(glob_vec("*/*/t[aob]m?to[.]t[!y]t"), ~[ + abs_path("aaa/tomato/tomato.txt"), + abs_path("aaa/tomato/tomoto.txt")]); + + assert_eq!(glob_vec("aa[a]"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aa[abc]"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("a[bca]a"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aa[b]"), ~[]); + assert_eq!(glob_vec("aa[xyz]"), ~[]); + assert_eq!(glob_vec("aa[]]"), ~[]); + + assert_eq!(glob_vec("aa[!b]"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aa[!bcd]"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("a[!bcd]a"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aa[!]"), ~[abs_path("aaa")]); + assert_eq!(glob_vec("aa[!a]"), ~[]); + assert_eq!(glob_vec("aa[!abc]"), ~[]); + + assert_eq!(glob_vec("bbb/specials/[[]"), ~[abs_path("bbb/specials/[")]); + assert_eq!(glob_vec("bbb/specials/!"), ~[abs_path("bbb/specials/!")]); + assert_eq!(glob_vec("bbb/specials/[]]"), ~[abs_path("bbb/specials/]")]); + + if os::consts::FAMILY != os::consts::windows::FAMILY { + assert_eq!(glob_vec("bbb/specials/[*]"), ~[abs_path("bbb/specials/*")]); + assert_eq!(glob_vec("bbb/specials/[?]"), ~[abs_path("bbb/specials/?")]); + } + }; + } + + #[test] + fn test_absolute_pattern() { + // assume that the filesystem is not empty! + assert!(glob("/*").next().is_some()); + assert!(glob("//").next().is_none()); + + // check windows absolute paths with host/device components + let root_with_device = (Path {components: ~[], .. os::getcwd()}).to_str() + "*"; + assert!(glob(root_with_device).next().is_some()); + } + + #[test] + fn test_lots_of_files() { + // this is a good test because it touches lots of differently named files + for glob("/*/*/*/*").advance |_p| {} + } + + #[test] + #[should_fail] + #[cfg(not(windows))] + fn test_unclosed_bracket() { + glob("abc[def"); + } +} + diff --git a/src/libstd/os.rs b/src/libstd/os.rs index 3afd946ee264b..bf6db6cb1534f 100644 --- a/src/libstd/os.rs +++ b/src/libstd/os.rs @@ -1227,88 +1227,6 @@ pub fn set_args(new_args: ~[~str]) { local_data::set(overridden_arg_key, overridden_args); } -// FIXME #6100 we should really use an internal implementation of this - using -// the POSIX glob functions isn't portable to windows, probably has slight -// inconsistencies even where it is implemented, and makes extending -// functionality a lot more difficult -// FIXME #6101 also provide a non-allocating version - each_glob or so? -/// Returns a vector of Path objects that match the given glob pattern -#[cfg(target_os = "linux")] -#[cfg(target_os = "android")] -#[cfg(target_os = "freebsd")] -#[cfg(target_os = "macos")] -pub fn glob(pattern: &str) -> ~[Path] { - #[cfg(target_os = "linux")] - #[cfg(target_os = "android")] - fn default_glob_t () -> libc::glob_t { - libc::glob_t { - gl_pathc: 0, - gl_pathv: ptr::null(), - gl_offs: 0, - __unused1: ptr::null(), - __unused2: ptr::null(), - __unused3: ptr::null(), - __unused4: ptr::null(), - __unused5: ptr::null(), - } - } - - #[cfg(target_os = "freebsd")] - fn default_glob_t () -> libc::glob_t { - libc::glob_t { - gl_pathc: 0, - __unused1: 0, - gl_offs: 0, - __unused2: 0, - gl_pathv: ptr::null(), - __unused3: ptr::null(), - __unused4: ptr::null(), - __unused5: ptr::null(), - __unused6: ptr::null(), - __unused7: ptr::null(), - __unused8: ptr::null(), - } - } - - #[cfg(target_os = "macos")] - fn default_glob_t () -> libc::glob_t { - libc::glob_t { - gl_pathc: 0, - __unused1: 0, - gl_offs: 0, - __unused2: 0, - gl_pathv: ptr::null(), - __unused3: ptr::null(), - __unused4: ptr::null(), - __unused5: ptr::null(), - __unused6: ptr::null(), - __unused7: ptr::null(), - __unused8: ptr::null(), - } - } - - let mut g = default_glob_t(); - do pattern.as_c_str |c_pattern| { - unsafe { libc::glob(c_pattern, 0, ptr::null(), &mut g) } - }; - do(|| { - let paths = unsafe { - vec::raw::from_buf_raw(g.gl_pathv, g.gl_pathc as uint) - }; - do paths.map |&c_str| { - Path(unsafe { str::raw::from_c_str(c_str) }) - } - }).finally { - unsafe { libc::globfree(&mut g) }; - } -} - -/// Returns a vector of Path objects that match the given glob pattern -#[cfg(target_os = "win32")] -pub fn glob(_pattern: &str) -> ~[Path] { - fail!("glob() is unimplemented on Windows") -} - #[cfg(target_os = "macos")] extern { // These functions are in crt_externs.h.