Skip to content

Commit

Permalink
expression: move checksum validation into expression parsing
Browse files Browse the repository at this point in the history
Right now our character validation logic is spread all over the place.
We have `expression::check_valid_chars` and `checksum::verify_checksum`
and also `checksum::Engine::input` which all do the same checks,
sharing the public array `expression::VALID_CHARS` which is arguably an
implementation detail and shouldn't be exposed.

In fact, whenever we parse any Miniscript object (a script, a
descriptor, a policy of either type), we are first validating the
checksum and then parsing into a tree, and our tree-parsing re-does
character validation that was already done as part of the checksum
check.

This commit moves the checksum error logic into expression, where it
belongs, and folds the checksum error variant into the expression
parsing error, where it belongs (removing it from the mega-enum at the
root).

However it leaves the Taproot parsing alone, which is still a bit of a
mess and will be addressed in a later PR.

Benchmarks show that this significantly slows down expression parsing
(as expected, since it is now doing checksumming) but significantly
speeds up descriptor parsing (which is no longer doing multiple checks
for character validity).
  • Loading branch information
apoelstra committed Nov 12, 2024
1 parent bb2e658 commit eb4f8c4
Show file tree
Hide file tree
Showing 12 changed files with 41 additions and 106 deletions.
7 changes: 2 additions & 5 deletions src/descriptor/bare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ use core::fmt;
use bitcoin::script::{self, PushBytes};
use bitcoin::{Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
use crate::miniscript::context::{ScriptContext, ScriptContextError};
Expand Down Expand Up @@ -186,8 +185,7 @@ impl<Pk: FromStrKey> FromTree for Bare<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Bare<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down Expand Up @@ -387,8 +385,7 @@ impl<Pk: FromStrKey> FromTree for Pkh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Pkh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
1 change: 0 additions & 1 deletion src/descriptor/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ use core::iter::FromIterator;
use bech32::primitives::checksum::PackedFe32;
use bech32::{Checksum, Fe32};

pub use crate::expression::VALID_CHARS;
use crate::prelude::*;

const CHECKSUM_LENGTH: usize = 8;
Expand Down
6 changes: 2 additions & 4 deletions src/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ use bitcoin::{
};
use sync::Arc;

use self::checksum::verify_checksum;
use crate::miniscript::decode::Terminal;
use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0};
use crate::plan::{AssetProvider, Plan};
Expand Down Expand Up @@ -988,8 +987,7 @@ impl<Pk: FromStrKey> FromStr for Descriptor<Pk> {
let desc = if s.starts_with("tr(") {
Ok(Descriptor::Tr(Tr::from_str(s)?))
} else {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
expression::FromTree::from_tree(&top)
}?;

Expand Down Expand Up @@ -1840,7 +1838,7 @@ mod tests {
($secp: ident,$($desc: expr),*) => {
$(
match Descriptor::parse_descriptor($secp, $desc) {
Err(Error::Checksum(_)) => {},
Err(Error::ParseTree(crate::ParseTreeError::Checksum(_))) => {},
Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e),
_ => panic!("Invalid checksum treated as valid: {}", $desc),
};
Expand Down
7 changes: 2 additions & 5 deletions src/descriptor/segwitv0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use core::fmt;

use bitcoin::{Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use super::SortedMultiVec;
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
Expand Down Expand Up @@ -288,8 +287,7 @@ impl<Pk: MiniscriptKey> fmt::Display for Wsh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Wsh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Wsh::<Pk>::from_tree(&top)
}
}
Expand Down Expand Up @@ -505,8 +503,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Wpkh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Wpkh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
4 changes: 1 addition & 3 deletions src/descriptor/sh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use core::fmt;
use bitcoin::script::PushBytes;
use bitcoin::{script, Address, Network, ScriptBuf, Weight};

use super::checksum::verify_checksum;
use super::{SortedMultiVec, Wpkh, Wsh};
use crate::descriptor::{write_descriptor, DefiniteDescriptorKey};
use crate::expression::{self, FromTree};
Expand Down Expand Up @@ -109,8 +108,7 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Sh<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Sh<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let top = expression::Tree::from_str(desc_str)?;
let top = expression::Tree::from_str(s)?;
Self::from_tree(&top)
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/descriptor/tr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,9 @@ impl<Pk: FromStrKey> crate::expression::FromTree for Tr<Pk> {
impl<Pk: FromStrKey> core::str::FromStr for Tr<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
let desc_str = verify_checksum(s)?;
let desc_str = verify_checksum(s)
.map_err(From::from)
.map_err(Error::ParseTree)?;
let top = parse_tr_tree(desc_str)?;
Self::from_tree(&top)
}
Expand Down Expand Up @@ -587,8 +589,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Tr<Pk> {

// Helper function to parse string into miniscript tree form
fn parse_tr_tree(s: &str) -> Result<expression::Tree, Error> {
expression::check_valid_chars(s)?;

if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' {
let rest = &s[3..s.len() - 1];
if !rest.contains(',') {
Expand Down
30 changes: 19 additions & 11 deletions src/expression/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,22 @@
use core::fmt;

use crate::descriptor::checksum;
use crate::prelude::*;
use crate::ThresholdError;

/// An error parsing an expression tree.
#[derive(Debug, PartialEq, Eq)]
pub enum ParseTreeError {
/// Error validating the checksum or character set.
Checksum(checksum::Error),
/// Expression tree had depth exceeding our hard cap.
MaxRecursionDepthExceeded {
/// The depth of the tree that was attempted to be parsed.
actual: usize,
/// The maximum depth.
maximum: u32,
},
/// Character occurred which was not part of the valid descriptor character set.
InvalidCharacter {
/// The character in question.
ch: char,
/// Its byte-index into the string.
pos: usize,
},
/// After a close-paren, the only valid next characters are close-parens and commas. Got
/// something else.
ExpectedParenOrComma {
Expand Down Expand Up @@ -66,15 +62,17 @@ pub enum ParseTreeError {
},
}

impl From<checksum::Error> for ParseTreeError {
fn from(e: checksum::Error) -> Self { Self::Checksum(e) }
}

impl fmt::Display for ParseTreeError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ParseTreeError::Checksum(ref e) => e.fmt(f),
ParseTreeError::MaxRecursionDepthExceeded { actual, maximum } => {
write!(f, "maximum recursion depth exceeded (max {}, got {})", maximum, actual)
}
ParseTreeError::InvalidCharacter { ch, pos } => {
write!(f, "character `{}` (position {}) not allowed in descriptor", ch, pos)
}
ParseTreeError::ExpectedParenOrComma { ch, pos } => {
write!(
f,
Expand Down Expand Up @@ -103,7 +101,17 @@ impl fmt::Display for ParseTreeError {
}
#[cfg(feature = "std")]
impl std::error::Error for ParseTreeError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { None }
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
ParseTreeError::Checksum(ref e) => Some(e),
ParseTreeError::MaxRecursionDepthExceeded { .. }
| ParseTreeError::ExpectedParenOrComma { .. }
| ParseTreeError::UnmatchedOpenParen { .. }
| ParseTreeError::UnmatchedCloseParen { .. }
| ParseTreeError::MismatchedParens { .. }
| ParseTreeError::TrailingCharacter { .. } => None,
}
}
}

/// Error parsing a threshold expression.
Expand Down
71 changes: 11 additions & 60 deletions src/expression/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,32 +9,13 @@ use core::fmt;
use core::str::FromStr;

pub use self::error::{ParseThresholdError, ParseTreeError};
use crate::descriptor::checksum::verify_checksum;
use crate::prelude::*;
use crate::{errstr, Error, Threshold, MAX_RECURSION_DEPTH};

/// Allowed characters are descriptor strings.
pub const INPUT_CHARSET: &str = "0123456789()[],'/*abcdefgh@:$%{}IJKLMNOPQRSTUVWXYZ&+-.;<=>?!^_|~ijklmnopqrstuvwxyzABCDEFGH`#\"\\ ";

/// Map of valid characters in descriptor strings.
#[rustfmt::skip]
pub const VALID_CHARS: [Option<u8>; 128] = [
None, None, None, None, None, None, None, None, None, None, None, None, None,
None, None, None, None, None, None, None, None, None, None, None, None, None,
None, None, None, None, None, None, Some(94), Some(59), Some(92), Some(91),
Some(28), Some(29), Some(50), Some(15), Some(10), Some(11), Some(17), Some(51),
Some(14), Some(52), Some(53), Some(16), Some(0), Some(1), Some(2), Some(3),
Some(4), Some(5), Some(6), Some(7), Some(8), Some(9), Some(27), Some(54),
Some(55), Some(56), Some(57), Some(58), Some(26), Some(82), Some(83),
Some(84), Some(85), Some(86), Some(87), Some(88), Some(89), Some(32), Some(33),
Some(34), Some(35), Some(36), Some(37), Some(38), Some(39), Some(40), Some(41),
Some(42), Some(43), Some(44), Some(45), Some(46), Some(47), Some(48), Some(49),
Some(12), Some(93), Some(13), Some(60), Some(61), Some(90), Some(18), Some(19),
Some(20), Some(21), Some(22), Some(23), Some(24), Some(25), Some(64), Some(65),
Some(66), Some(67), Some(68), Some(69), Some(70), Some(71), Some(72), Some(73),
Some(74), Some(75), Some(76), Some(77), Some(78), Some(79), Some(80), Some(81),
Some(30), Some(62), Some(31), Some(63), None,
];

#[derive(Debug)]
/// A token of the form `x(...)` or `x`
pub struct Tree<'a> {
Expand Down Expand Up @@ -145,19 +126,17 @@ impl<'a> Tree<'a> {
Self::from_slice_delim(sl, 0u32, '(')
}

fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<(), ParseTreeError> {
// First, scan through string to make sure it is well-formed.
let mut max_depth = 0;
/// Check that a string is a well-formed expression string, with optional
/// checksum.
///
/// Returns the string with the checksum removed.
fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<&str, ParseTreeError> {
// Do ASCII check first; after this we can use .bytes().enumerate() rather
// than .char_indices(), which is *significantly* faster.
for (pos, ch) in s.char_indices() {
if !(32..128).contains(&u32::from(ch)) {
return Err(ParseTreeError::InvalidCharacter { ch, pos });
}
}
let s = verify_checksum(s)?;

let mut max_depth = 0;
let mut open_paren_stack = Vec::with_capacity(128);

for (pos, ch) in s.bytes().enumerate() {
if ch == open {
open_paren_stack.push((ch, pos));
Expand Down Expand Up @@ -232,7 +211,7 @@ impl<'a> Tree<'a> {
});
}

Ok(())
Ok(s)
}

pub(crate) fn from_slice_delim(
Expand All @@ -242,9 +221,9 @@ impl<'a> Tree<'a> {
) -> Result<(Tree<'a>, &'a str), Error> {
if depth == 0 {
if delim == '{' {
Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?;
sl = Self::parse_pre_check(sl, b'{', b'}').map_err(Error::ParseTree)?;
} else {
Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?;
sl = Self::parse_pre_check(sl, b'(', b')').map_err(Error::ParseTree)?;
}
}

Expand Down Expand Up @@ -288,8 +267,6 @@ impl<'a> Tree<'a> {
/// Parses a tree from a string
#[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes.
pub fn from_str(s: &'a str) -> Result<Tree<'a>, Error> {
check_valid_chars(s)?;

let (top, rem) = Tree::from_slice(s)?;
if rem.is_empty() {
Ok(top)
Expand Down Expand Up @@ -328,23 +305,6 @@ impl<'a> Tree<'a> {
}
}

/// Filter out non-ASCII because we byte-index strings all over the
/// place and Rust gets very upset when you splinch a string.
pub fn check_valid_chars(s: &str) -> Result<(), Error> {
for ch in s.bytes() {
if !ch.is_ascii() {
return Err(Error::Unprintable(ch));
}
// Index bounds: We know that ch is ASCII, so it is <= 127.
if VALID_CHARS[ch as usize].is_none() {
return Err(Error::Unexpected(
"Only characters in INPUT_CHARSET are allowed".to_string(),
));
}
}
Ok(())
}

/// Parse a string as a u32, for timelocks or thresholds
pub fn parse_num(s: &str) -> Result<u32, Error> {
if s.len() > 1 {
Expand Down Expand Up @@ -418,15 +378,6 @@ mod tests {
assert!(parse_num("-6").is_err());
}

#[test]
fn test_valid_char_map() {
let mut valid_chars = [None; 128];
for (i, ch) in super::INPUT_CHARSET.chars().enumerate() {
valid_chars[ch as usize] = Some(i as u8);
}
assert_eq!(valid_chars, super::VALID_CHARS);
}

#[test]
fn parse_tree_basic() {
assert_eq!(Tree::from_str("thresh").unwrap(), leaf("thresh"));
Expand Down
9 changes: 0 additions & 9 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -492,8 +492,6 @@ pub enum Error {
Threshold(ThresholdError),
/// Invalid threshold.
ParseThreshold(ParseThresholdError),
/// Checksum error parsing a descriptor.
Checksum(descriptor::checksum::Error),
/// Invalid expression tree.
ParseTree(ParseTreeError),
}
Expand Down Expand Up @@ -557,7 +555,6 @@ impl fmt::Display for Error {
Error::RelativeLockTime(ref e) => e.fmt(f),
Error::Threshold(ref e) => e.fmt(f),
Error::ParseThreshold(ref e) => e.fmt(f),
Error::Checksum(ref e) => e.fmt(f),
Error::ParseTree(ref e) => e.fmt(f),
}
}
Expand Down Expand Up @@ -609,7 +606,6 @@ impl error::Error for Error {
RelativeLockTime(e) => Some(e),
Threshold(e) => Some(e),
ParseThreshold(e) => Some(e),
Checksum(e) => Some(e),
ParseTree(e) => Some(e),
}
}
Expand Down Expand Up @@ -650,11 +646,6 @@ impl From<bitcoin::address::P2shError> for Error {
fn from(e: bitcoin::address::P2shError) -> Error { Error::AddrP2shError(e) }
}

#[doc(hidden)]
impl From<descriptor::checksum::Error> for Error {
fn from(e: descriptor::checksum::Error) -> Error { Error::Checksum(e) }
}

#[doc(hidden)]
#[cfg(feature = "compiler")]
impl From<crate::policy::compiler::CompilerError> for Error {
Expand Down
2 changes: 1 addition & 1 deletion src/miniscript/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1315,7 +1315,7 @@ mod tests {
assert!(Segwitv0Script::from_str_insane("🌏")
.unwrap_err()
.to_string()
.contains("unprintable character"));
.contains("invalid character"));
}

#[test]
Expand Down
2 changes: 0 additions & 2 deletions src/policy/concrete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -835,8 +835,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Policy<Pk> {
impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Policy<Pk>, Error> {
expression::check_valid_chars(s)?;

let tree = expression::Tree::from_str(s)?;
let policy: Policy<Pk> = FromTree::from_tree(&tree)?;
policy.check_timelocks().map_err(Error::ConcretePolicy)?;
Expand Down
2 changes: 0 additions & 2 deletions src/policy/semantic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,8 +314,6 @@ impl<Pk: MiniscriptKey> fmt::Display for Policy<Pk> {
impl<Pk: FromStrKey> str::FromStr for Policy<Pk> {
type Err = Error;
fn from_str(s: &str) -> Result<Policy<Pk>, Error> {
expression::check_valid_chars(s)?;

let tree = expression::Tree::from_str(s)?;
expression::FromTree::from_tree(&tree)
}
Expand Down

0 comments on commit eb4f8c4

Please sign in to comment.