From eb4f8c4dce3c45d6e510075a63d2115f7198f7b4 Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Sun, 8 Sep 2024 16:32:58 +0000 Subject: [PATCH] expression: move checksum validation into expression parsing Right now our character validation logic is spread all over the place. We have `expression::check_valid_chars` and `checksum::verify_checksum` and also `checksum::Engine::input` which all do the same checks, sharing the public array `expression::VALID_CHARS` which is arguably an implementation detail and shouldn't be exposed. In fact, whenever we parse any Miniscript object (a script, a descriptor, a policy of either type), we are first validating the checksum and then parsing into a tree, and our tree-parsing re-does character validation that was already done as part of the checksum check. This commit moves the checksum error logic into expression, where it belongs, and folds the checksum error variant into the expression parsing error, where it belongs (removing it from the mega-enum at the root). However it leaves the Taproot parsing alone, which is still a bit of a mess and will be addressed in a later PR. Benchmarks show that this significantly slows down expression parsing (as expected, since it is now doing checksumming) but significantly speeds up descriptor parsing (which is no longer doing multiple checks for character validity). --- src/descriptor/bare.rs | 7 ++-- src/descriptor/checksum.rs | 1 - src/descriptor/mod.rs | 6 ++-- src/descriptor/segwitv0.rs | 7 ++-- src/descriptor/sh.rs | 4 +-- src/descriptor/tr.rs | 6 ++-- src/expression/error.rs | 30 ++++++++++------ src/expression/mod.rs | 71 ++++++-------------------------------- src/lib.rs | 9 ----- src/miniscript/mod.rs | 2 +- src/policy/concrete.rs | 2 -- src/policy/semantic.rs | 2 -- 12 files changed, 41 insertions(+), 106 deletions(-) diff --git a/src/descriptor/bare.rs b/src/descriptor/bare.rs index 176137ad8..63cb8168a 100644 --- a/src/descriptor/bare.rs +++ b/src/descriptor/bare.rs @@ -12,7 +12,6 @@ use core::fmt; use bitcoin::script::{self, PushBytes}; use bitcoin::{Address, Network, ScriptBuf, Weight}; -use super::checksum::verify_checksum; use crate::descriptor::{write_descriptor, DefiniteDescriptorKey}; use crate::expression::{self, FromTree}; use crate::miniscript::context::{ScriptContext, ScriptContextError}; @@ -186,8 +185,7 @@ impl FromTree for Bare { impl core::str::FromStr for Bare { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; Self::from_tree(&top) } } @@ -387,8 +385,7 @@ impl FromTree for Pkh { impl core::str::FromStr for Pkh { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; Self::from_tree(&top) } } diff --git a/src/descriptor/checksum.rs b/src/descriptor/checksum.rs index 717d32dcb..6ae296433 100644 --- a/src/descriptor/checksum.rs +++ b/src/descriptor/checksum.rs @@ -14,7 +14,6 @@ use core::iter::FromIterator; use bech32::primitives::checksum::PackedFe32; use bech32::{Checksum, Fe32}; -pub use crate::expression::VALID_CHARS; use crate::prelude::*; const CHECKSUM_LENGTH: usize = 8; diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index 16170040b..6c973cc5d 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -21,7 +21,6 @@ use bitcoin::{ }; use sync::Arc; -use self::checksum::verify_checksum; use crate::miniscript::decode::Terminal; use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0}; use crate::plan::{AssetProvider, Plan}; @@ -988,8 +987,7 @@ impl FromStr for Descriptor { let desc = if s.starts_with("tr(") { Ok(Descriptor::Tr(Tr::from_str(s)?)) } else { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; expression::FromTree::from_tree(&top) }?; @@ -1840,7 +1838,7 @@ mod tests { ($secp: ident,$($desc: expr),*) => { $( match Descriptor::parse_descriptor($secp, $desc) { - Err(Error::Checksum(_)) => {}, + Err(Error::ParseTree(crate::ParseTreeError::Checksum(_))) => {}, Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e), _ => panic!("Invalid checksum treated as valid: {}", $desc), }; diff --git a/src/descriptor/segwitv0.rs b/src/descriptor/segwitv0.rs index 2f2532b85..c8552eb4b 100644 --- a/src/descriptor/segwitv0.rs +++ b/src/descriptor/segwitv0.rs @@ -10,7 +10,6 @@ use core::fmt; use bitcoin::{Address, Network, ScriptBuf, Weight}; -use super::checksum::verify_checksum; use super::SortedMultiVec; use crate::descriptor::{write_descriptor, DefiniteDescriptorKey}; use crate::expression::{self, FromTree}; @@ -288,8 +287,7 @@ impl fmt::Display for Wsh { impl core::str::FromStr for Wsh { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; Wsh::::from_tree(&top) } } @@ -505,8 +503,7 @@ impl crate::expression::FromTree for Wpkh { impl core::str::FromStr for Wpkh { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; Self::from_tree(&top) } } diff --git a/src/descriptor/sh.rs b/src/descriptor/sh.rs index cf05c1b71..f9f21544d 100644 --- a/src/descriptor/sh.rs +++ b/src/descriptor/sh.rs @@ -13,7 +13,6 @@ use core::fmt; use bitcoin::script::PushBytes; use bitcoin::{script, Address, Network, ScriptBuf, Weight}; -use super::checksum::verify_checksum; use super::{SortedMultiVec, Wpkh, Wsh}; use crate::descriptor::{write_descriptor, DefiniteDescriptorKey}; use crate::expression::{self, FromTree}; @@ -109,8 +108,7 @@ impl crate::expression::FromTree for Sh { impl core::str::FromStr for Sh { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; - let top = expression::Tree::from_str(desc_str)?; + let top = expression::Tree::from_str(s)?; Self::from_tree(&top) } } diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index 30d6c5c74..77cceb7e0 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -557,7 +557,9 @@ impl crate::expression::FromTree for Tr { impl core::str::FromStr for Tr { type Err = Error; fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s)?; + let desc_str = verify_checksum(s) + .map_err(From::from) + .map_err(Error::ParseTree)?; let top = parse_tr_tree(desc_str)?; Self::from_tree(&top) } @@ -587,8 +589,6 @@ impl fmt::Display for Tr { // Helper function to parse string into miniscript tree form fn parse_tr_tree(s: &str) -> Result { - expression::check_valid_chars(s)?; - if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' { let rest = &s[3..s.len() - 1]; if !rest.contains(',') { diff --git a/src/expression/error.rs b/src/expression/error.rs index c551340f6..d705e2df5 100644 --- a/src/expression/error.rs +++ b/src/expression/error.rs @@ -4,12 +4,15 @@ use core::fmt; +use crate::descriptor::checksum; use crate::prelude::*; use crate::ThresholdError; /// An error parsing an expression tree. #[derive(Debug, PartialEq, Eq)] pub enum ParseTreeError { + /// Error validating the checksum or character set. + Checksum(checksum::Error), /// Expression tree had depth exceeding our hard cap. MaxRecursionDepthExceeded { /// The depth of the tree that was attempted to be parsed. @@ -17,13 +20,6 @@ pub enum ParseTreeError { /// The maximum depth. maximum: u32, }, - /// Character occurred which was not part of the valid descriptor character set. - InvalidCharacter { - /// The character in question. - ch: char, - /// Its byte-index into the string. - pos: usize, - }, /// After a close-paren, the only valid next characters are close-parens and commas. Got /// something else. ExpectedParenOrComma { @@ -66,15 +62,17 @@ pub enum ParseTreeError { }, } +impl From for ParseTreeError { + fn from(e: checksum::Error) -> Self { Self::Checksum(e) } +} + impl fmt::Display for ParseTreeError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + ParseTreeError::Checksum(ref e) => e.fmt(f), ParseTreeError::MaxRecursionDepthExceeded { actual, maximum } => { write!(f, "maximum recursion depth exceeded (max {}, got {})", maximum, actual) } - ParseTreeError::InvalidCharacter { ch, pos } => { - write!(f, "character `{}` (position {}) not allowed in descriptor", ch, pos) - } ParseTreeError::ExpectedParenOrComma { ch, pos } => { write!( f, @@ -103,7 +101,17 @@ impl fmt::Display for ParseTreeError { } #[cfg(feature = "std")] impl std::error::Error for ParseTreeError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { None } + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + match self { + ParseTreeError::Checksum(ref e) => Some(e), + ParseTreeError::MaxRecursionDepthExceeded { .. } + | ParseTreeError::ExpectedParenOrComma { .. } + | ParseTreeError::UnmatchedOpenParen { .. } + | ParseTreeError::UnmatchedCloseParen { .. } + | ParseTreeError::MismatchedParens { .. } + | ParseTreeError::TrailingCharacter { .. } => None, + } + } } /// Error parsing a threshold expression. diff --git a/src/expression/mod.rs b/src/expression/mod.rs index ccf81204a..844b3c869 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -9,32 +9,13 @@ use core::fmt; use core::str::FromStr; pub use self::error::{ParseThresholdError, ParseTreeError}; +use crate::descriptor::checksum::verify_checksum; use crate::prelude::*; use crate::{errstr, Error, Threshold, MAX_RECURSION_DEPTH}; /// Allowed characters are descriptor strings. pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ "; -/// Map of valid characters in descriptor strings. -#[rustfmt::skip] -pub const VALID_CHARS: [Option; 128] = [ - None, None, None, None, None, None, None, None, None, None, None, None, None, - None, None, None, None, None, None, None, None, None, None, None, None, None, - None, None, None, None, None, None, Some(94), Some(59), Some(92), Some(91), - Some(28), Some(29), Some(50), Some(15), Some(10), Some(11), Some(17), Some(51), - Some(14), Some(52), Some(53), Some(16), Some(0), Some(1), Some(2), Some(3), - Some(4), Some(5), Some(6), Some(7), Some(8), Some(9), Some(27), Some(54), - Some(55), Some(56), Some(57), Some(58), Some(26), Some(82), Some(83), - Some(84), Some(85), Some(86), Some(87), Some(88), Some(89), Some(32), Some(33), - Some(34), Some(35), Some(36), Some(37), Some(38), Some(39), Some(40), Some(41), - Some(42), Some(43), Some(44), Some(45), Some(46), Some(47), Some(48), Some(49), - Some(12), Some(93), Some(13), Some(60), Some(61), Some(90), Some(18), Some(19), - Some(20), Some(21), Some(22), Some(23), Some(24), Some(25), Some(64), Some(65), - Some(66), Some(67), Some(68), Some(69), Some(70), Some(71), Some(72), Some(73), - Some(74), Some(75), Some(76), Some(77), Some(78), Some(79), Some(80), Some(81), - Some(30), Some(62), Some(31), Some(63), None, -]; - #[derive(Debug)] /// A token of the form `x(...)` or `x` pub struct Tree<'a> { @@ -145,19 +126,17 @@ impl<'a> Tree<'a> { Self::from_slice_delim(sl, 0u32, '(') } - fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<(), ParseTreeError> { - // First, scan through string to make sure it is well-formed. - let mut max_depth = 0; + /// Check that a string is a well-formed expression string, with optional + /// checksum. + /// + /// Returns the string with the checksum removed. + fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<&str, ParseTreeError> { // Do ASCII check first; after this we can use .bytes().enumerate() rather // than .char_indices(), which is *significantly* faster. - for (pos, ch) in s.char_indices() { - if !(32..128).contains(&u32::from(ch)) { - return Err(ParseTreeError::InvalidCharacter { ch, pos }); - } - } + let s = verify_checksum(s)?; + let mut max_depth = 0; let mut open_paren_stack = Vec::with_capacity(128); - for (pos, ch) in s.bytes().enumerate() { if ch == open { open_paren_stack.push((ch, pos)); @@ -232,7 +211,7 @@ impl<'a> Tree<'a> { }); } - Ok(()) + Ok(s) } pub(crate) fn from_slice_delim( @@ -242,9 +221,9 @@ impl<'a> Tree<'a> { ) -> Result<(Tree<'a>, &'a str), Error> { if depth == 0 { if delim == '{' { - Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?; + sl = Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?; } else { - Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?; + sl = Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?; } } @@ -288,8 +267,6 @@ impl<'a> Tree<'a> { /// Parses a tree from a string #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes. pub fn from_str(s: &'a str) -> Result, Error> { - check_valid_chars(s)?; - let (top, rem) = Tree::from_slice(s)?; if rem.is_empty() { Ok(top) @@ -328,23 +305,6 @@ impl<'a> Tree<'a> { } } -/// Filter out non-ASCII because we byte-index strings all over the -/// place and Rust gets very upset when you splinch a string. -pub fn check_valid_chars(s: &str) -> Result<(), Error> { - for ch in s.bytes() { - if !ch.is_ascii() { - return Err(Error::Unprintable(ch)); - } - // Index bounds: We know that ch is ASCII, so it is <= 127. - if VALID_CHARS[ch as usize].is_none() { - return Err(Error::Unexpected( - "Only characters in INPUT_CHARSET are allowed".to_string(), - )); - } - } - Ok(()) -} - /// Parse a string as a u32, for timelocks or thresholds pub fn parse_num(s: &str) -> Result { if s.len() > 1 { @@ -418,15 +378,6 @@ mod tests { assert!(parse_num("-6").is_err()); } - #[test] - fn test_valid_char_map() { - let mut valid_chars = [None; 128]; - for (i, ch) in super::INPUT_CHARSET.chars().enumerate() { - valid_chars[ch as usize] = Some(i as u8); - } - assert_eq!(valid_chars, super::VALID_CHARS); - } - #[test] fn parse_tree_basic() { assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh")); diff --git a/src/lib.rs b/src/lib.rs index 6c3a13205..f15d99e46 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -492,8 +492,6 @@ pub enum Error { Threshold(ThresholdError), /// Invalid threshold. ParseThreshold(ParseThresholdError), - /// Checksum error parsing a descriptor. - Checksum(descriptor::checksum::Error), /// Invalid expression tree. ParseTree(ParseTreeError), } @@ -557,7 +555,6 @@ impl fmt::Display for Error { Error::RelativeLockTime(ref e) => e.fmt(f), Error::Threshold(ref e) => e.fmt(f), Error::ParseThreshold(ref e) => e.fmt(f), - Error::Checksum(ref e) => e.fmt(f), Error::ParseTree(ref e) => e.fmt(f), } } @@ -609,7 +606,6 @@ impl error::Error for Error { RelativeLockTime(e) => Some(e), Threshold(e) => Some(e), ParseThreshold(e) => Some(e), - Checksum(e) => Some(e), ParseTree(e) => Some(e), } } @@ -650,11 +646,6 @@ impl From for Error { fn from(e: bitcoin::address::P2shError) -> Error { Error::AddrP2shError(e) } } -#[doc(hidden)] -impl From for Error { - fn from(e: descriptor::checksum::Error) -> Error { Error::Checksum(e) } -} - #[doc(hidden)] #[cfg(feature = "compiler")] impl From for Error { diff --git a/src/miniscript/mod.rs b/src/miniscript/mod.rs index 1b2bddc7d..03c242ec8 100644 --- a/src/miniscript/mod.rs +++ b/src/miniscript/mod.rs @@ -1315,7 +1315,7 @@ mod tests { assert!(Segwitv0Script::from_str_insane("🌏") .unwrap_err() .to_string() - .contains("unprintable character")); + .contains("invalid character")); } #[test] diff --git a/src/policy/concrete.rs b/src/policy/concrete.rs index 578d821b4..deebe289c 100644 --- a/src/policy/concrete.rs +++ b/src/policy/concrete.rs @@ -835,8 +835,6 @@ impl fmt::Display for Policy { impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { - expression::check_valid_chars(s)?; - let tree = expression::Tree::from_str(s)?; let policy: Policy = FromTree::from_tree(&tree)?; policy.check_timelocks().map_err(Error::ConcretePolicy)?; diff --git a/src/policy/semantic.rs b/src/policy/semantic.rs index 62487a54a..2eca31350 100644 --- a/src/policy/semantic.rs +++ b/src/policy/semantic.rs @@ -314,8 +314,6 @@ impl fmt::Display for Policy { impl str::FromStr for Policy { type Err = Error; fn from_str(s: &str) -> Result, Error> { - expression::check_valid_chars(s)?; - let tree = expression::Tree::from_str(s)?; expression::FromTree::from_tree(&tree) }