From 34d75f88905b4ee197f82fed37aa05e2b6bc509a Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Sun, 8 Sep 2024 16:47:03 +0000 Subject: [PATCH] expression: unify Taproot and non-Taproot parsing The `expression::Tree` type now parses expression trees containing both {} and () as children. It marks which is which, and it is the responsibility of FromTree implementors to make sure that the correct style is used. This eliminates a ton of ad-hoc string parsing code from descriptor/tr.rs, including 8 instances of Error::Unexpected. It is also the first change that preserves (sorta) a pubkey parsing error rather than converting it to a string. Because of https://github.com/rust-bitcoin/rust-miniscript/issues/734 this inserts a call to `Descriptor::sanity_check` when parsing a string, specifically for Taproot descriptors. This is weird and wrong but we want to address it separately from this PR, whose goal is to preserve all behavior. --- src/descriptor/mod.rs | 38 +++++-- src/descriptor/tr.rs | 234 +++++++++++++++++++----------------------- src/expression/mod.rs | 69 +++++-------- 3 files changed, 158 insertions(+), 183 deletions(-) diff --git a/src/descriptor/mod.rs b/src/descriptor/mod.rs index a1d29b441..f0791dc1d 100644 --- a/src/descriptor/mod.rs +++ b/src/descriptor/mod.rs @@ -21,6 +21,7 @@ use bitcoin::{ }; use sync::Arc; +use crate::expression::FromTree as _; use crate::miniscript::decode::Terminal; use crate::miniscript::{satisfy, Legacy, Miniscript, Segwitv0}; use crate::plan::{AssetProvider, Plan}; @@ -981,17 +982,17 @@ impl crate::expression::FromTree for Descriptor { impl FromStr for Descriptor { type Err = Error; fn from_str(s: &str) -> Result, Error> { - // tr tree parsing has special code - // Tr::from_str will check the checksum - // match "tr(" to handle more extensibly - let desc = if s.starts_with("tr(") { - Ok(Descriptor::Tr(Tr::from_str(s)?)) - } else { - let top = expression::Tree::from_str(s)?; - expression::FromTree::from_tree(&top) - }?; - - Ok(desc) + let top = expression::Tree::from_str(s)?; + let ret = Self::from_tree(&top)?; + if let Descriptor::Tr(ref inner) = ret { + // FIXME preserve weird/broken behavior from 12.x. + // See https://github.com/rust-bitcoin/rust-miniscript/issues/734 + ret.sanity_check()?; + for (_, ms) in inner.iter_scripts() { + ms.ext_check(&crate::miniscript::analyzable::ExtParams::sane())?; + } + } + Ok(ret) } } @@ -1545,6 +1546,21 @@ mod tests { ) } + #[test] + fn tr_named_branch() { + use crate::{ParseError, ParseTreeError}; + + assert!(matches!( + StdDescriptor::from_str( + "tr(0202d44008000010100000000084F0000000dd0dd00000000000201dceddd00d00,abc{0,0})" + ), + Err(Error::Parse(ParseError::Tree(ParseTreeError::IncorrectName { + expected: "", + .. + }))), + )); + } + #[test] fn roundtrip_tests() { let descriptor = Descriptor::::from_str("multi"); diff --git a/src/descriptor/tr.rs b/src/descriptor/tr.rs index 486162deb..cc0dd1945 100644 --- a/src/descriptor/tr.rs +++ b/src/descriptor/tr.rs @@ -1,6 +1,5 @@ // SPDX-License-Identifier: CC0-1.0 -use core::str::FromStr; use core::{cmp, fmt, hash}; #[cfg(not(test))] // https://github.com/rust-lang/rust/issues/121684 @@ -12,9 +11,10 @@ use bitcoin::taproot::{ use bitcoin::{opcodes, Address, Network, ScriptBuf, Weight}; use sync::Arc; -use super::checksum::{self, verify_checksum}; +use super::checksum; use crate::descriptor::DefiniteDescriptorKey; use crate::expression::{self, FromTree}; +use crate::iter::TreeLike as _; use crate::miniscript::satisfy::{Placeholder, Satisfaction, SchnorrSigType, Witness}; use crate::miniscript::Miniscript; use crate::plan::AssetProvider; @@ -23,8 +23,8 @@ use crate::policy::Liftable; use crate::prelude::*; use crate::util::{varint_len, witness_size}; use crate::{ - Error, ForEachKey, FromStrKey, MiniscriptKey, Satisfier, ScriptContext, Tap, Threshold, - ToPublicKey, TranslateErr, Translator, + Error, ForEachKey, FromStrKey, MiniscriptKey, ParseError, Satisfier, ScriptContext, Tap, + Threshold, ToPublicKey, TranslateErr, Translator, }; /// A Taproot Tree representation. @@ -490,79 +490,114 @@ where } } -#[rustfmt::skip] -impl Tr { - // Helper function to parse taproot script path - fn parse_tr_script_spend(tree: &expression::Tree,) -> Result, Error> { - match tree { - expression::Tree { name, args, .. } if !name.is_empty() && args.is_empty() => { - let script = Miniscript::::from_str(name)?; - Ok(TapTree::Leaf(Arc::new(script))) - } - expression::Tree { name, args, .. } if name.is_empty() && args.len() == 2 => { - let left = Self::parse_tr_script_spend(&args[0])?; - let right = Self::parse_tr_script_spend(&args[1])?; - Ok(TapTree::combine(left, right)) - } - _ => { - Err(Error::Unexpected( - "unknown format for script spending paths while parsing taproot descriptor" - .to_string(), - ))}, - } +impl core::str::FromStr for Tr { + type Err = Error; + + fn from_str(s: &str) -> Result { + let expr_tree = expression::Tree::from_str(s)?; + Self::from_tree(&expr_tree) } } impl crate::expression::FromTree for Tr { - fn from_tree(top: &expression::Tree) -> Result { - if top.name == "tr" { - match top.args.len() { - 1 => { - let key = &top.args[0]; - if !key.args.is_empty() { - return Err(Error::Unexpected(format!( - "#{} script associated with `key-path` while parsing taproot descriptor", - key.args.len() - ))); + fn from_tree(expr_tree: &expression::Tree) -> Result { + use crate::expression::{Parens, ParseTreeError}; + + expr_tree + .verify_toplevel("tr", 1..=2) + .map_err(From::from) + .map_err(Error::Parse)?; + + let mut round_paren_depth = 0; + + let mut internal_key = None; + let mut tree_stack = vec![]; + + for item in expr_tree.verbose_pre_order_iter() { + // Top-level "tr" node. + if item.index == 0 { + if item.is_complete { + debug_assert!( + internal_key.is_some(), + "checked above that top-level 'tr' has children" + ); + } + } else if item.index == 1 { + // First child of tr, which must be the internal key + if !item.node.args.is_empty() { + return Err(Error::Parse(ParseError::Tree( + ParseTreeError::IncorrectNumberOfChildren { + description: "internal key", + n_children: item.node.args.len(), + minimum: Some(0), + maximum: Some(0), + }, + ))); + } + internal_key = Some( + Pk::from_str(item.node.name) + .map_err(ParseError::box_from_str) + .map_err(Error::Parse)?, + ); + } else { + // From here on we are into the taptree. + if item.n_children_yielded == 0 { + match item.node.parens { + Parens::Curly => { + if !item.node.name.is_empty() { + return Err(Error::Parse(ParseError::Tree( + ParseTreeError::IncorrectName { + actual: item.node.name.to_owned(), + expected: "", + }, + ))); + } + if round_paren_depth > 0 { + return Err(Error::Parse(ParseError::Tree( + ParseTreeError::IllegalCurlyBrace { + pos: item.node.children_pos, + }, + ))); + } + } + Parens::Round => round_paren_depth += 1, + _ => {} } - Tr::new(expression::terminal(key, Pk::from_str)?, None) } - 2 => { - let key = &top.args[0]; - if !key.args.is_empty() { - return Err(Error::Unexpected(format!( - "#{} script associated with `key-path` while parsing taproot descriptor", - key.args.len() - ))); + if item.is_complete { + if item.node.parens == Parens::Curly { + if item.n_children_yielded == 2 { + let rchild = tree_stack.pop().unwrap(); + let lchild = tree_stack.pop().unwrap(); + tree_stack.push(TapTree::combine(lchild, rchild)); + } else { + return Err(Error::Parse(ParseError::Tree( + ParseTreeError::IncorrectNumberOfChildren { + description: "Taptree node", + n_children: item.n_children_yielded, + minimum: Some(2), + maximum: Some(2), + }, + ))); + } + } else { + if item.node.parens == Parens::Round { + round_paren_depth -= 1; + } + if round_paren_depth == 0 { + let script = Miniscript::from_tree(item.node)?; + // FIXME hack for https://github.com/rust-bitcoin/rust-miniscript/issues/734 + if script.ty.corr.base != crate::miniscript::types::Base::B { + return Err(Error::NonTopLevel(format!("{:?}", script))); + }; + tree_stack.push(TapTree::Leaf(Arc::new(script))); + } } - let tree = &top.args[1]; - let ret = Self::parse_tr_script_spend(tree)?; - Tr::new(expression::terminal(key, Pk::from_str)?, Some(ret)) } - _ => Err(Error::Unexpected(format!( - "{}[#{} args] while parsing taproot descriptor", - top.name, - top.args.len() - ))), } - } else { - Err(Error::Unexpected(format!( - "{}[#{} args] while parsing taproot descriptor", - top.name, - top.args.len() - ))) } - } -} - -impl core::str::FromStr for Tr { - type Err = Error; - fn from_str(s: &str) -> Result { - let desc_str = verify_checksum(s) - .map_err(From::from) - .map_err(Error::ParseTree)?; - let top = parse_tr_tree(desc_str)?; - Self::from_tree(&top) + assert!(tree_stack.len() <= 1); + Tr::new(internal_key.unwrap(), tree_stack.pop()) } } @@ -588,69 +623,6 @@ impl fmt::Display for Tr { } } -// Helper function to parse string into miniscript tree form -fn parse_tr_tree(s: &str) -> Result { - if s.len() > 3 && &s[..3] == "tr(" && s.as_bytes()[s.len() - 1] == b')' { - let rest = &s[3..s.len() - 1]; - if !rest.contains(',') { - let key = expression::Tree::from_str(rest)?; - if !key.args.is_empty() { - return Err(Error::Unexpected("invalid taproot internal key".to_string())); - } - let internal_key = expression::Tree { - name: key.name, - parens: expression::Parens::None, - children_pos: 0, - args: vec![], - }; - return Ok(expression::Tree { - name: "tr", - parens: expression::Parens::Round, - children_pos: 0, - args: vec![internal_key], - }); - } - // use str::split_once() method to refactor this when compiler version bumps up - let (key, script) = split_once(rest, ',') - .ok_or_else(|| Error::BadDescriptor("invalid taproot descriptor".to_string()))?; - - let key = expression::Tree::from_str(key)?; - if !key.args.is_empty() { - return Err(Error::Unexpected("invalid taproot internal key".to_string())); - } - let internal_key = expression::Tree { - name: key.name, - parens: expression::Parens::None, - children_pos: 0, - args: vec![], - }; - let tree = expression::Tree::from_slice_delim(script, expression::Deliminator::Taproot) - .map_err(Error::ParseTree)?; - Ok(expression::Tree { - name: "tr", - parens: expression::Parens::Round, - children_pos: 0, - args: vec![internal_key, tree], - }) - } else { - Err(Error::Unexpected("invalid taproot descriptor".to_string())) - } -} - -fn split_once(inp: &str, delim: char) -> Option<(&str, &str)> { - if inp.is_empty() { - None - } else { - // find the first character that matches delim - let res = inp - .chars() - .position(|ch| ch == delim) - .map(|idx| (&inp[..idx], &inp[idx + 1..])) - .unwrap_or((inp, "")); - Some(res) - } -} - impl Liftable for TapTree { fn lift(&self) -> Result, Error> { fn lift_helper(s: &TapTree) -> Result, Error> { @@ -767,6 +739,8 @@ where #[cfg(test)] mod tests { + use core::str::FromStr; + use super::*; fn descriptor() -> String { diff --git a/src/expression/mod.rs b/src/expression/mod.rs index 6f8367827..8b0a4258f 100644 --- a/src/expression/mod.rs +++ b/src/expression/mod.rs @@ -71,15 +71,6 @@ pub enum Parens { Curly, } -/// Whether to treat `{` and `}` as deliminators when parsing an expression. -#[derive(Copy, Clone, PartialEq, Eq)] -pub enum Deliminator { - /// Use `(` and `)` as parentheses. - NonTaproot, - /// Use `{` and `}` as parentheses. - Taproot, -} - /// A trait for extracting a structure from a Tree representation in token form pub trait FromTree: Sized { /// Extract a structure from Tree representation @@ -156,16 +147,11 @@ impl<'a> Tree<'a> { Ok(()) } - /// Parse an expression with round brackets - pub fn from_slice(sl: &'a str) -> Result, ParseTreeError> { - Self::from_slice_delim(sl, Deliminator::NonTaproot) - } - /// Check that a string is a well-formed expression string, with optional /// checksum. /// /// Returns the string with the checksum removed and its tree depth. - fn parse_pre_check(s: &str, open: u8, close: u8) -> Result<(&str, usize), ParseTreeError> { + fn parse_pre_check(s: &str) -> Result<(&str, usize), ParseTreeError> { // First, scan through string to make sure it is well-formed. // Do ASCII/checksum check first; after this we can use .bytes().enumerate() rather // than .char_indices(), which is *significantly* faster. @@ -174,12 +160,12 @@ impl<'a> Tree<'a> { let mut max_depth = 0; let mut open_paren_stack = Vec::with_capacity(128); for (pos, ch) in s.bytes().enumerate() { - if ch == open { + if ch == b'(' || ch == b'{' { open_paren_stack.push((ch, pos)); if max_depth < open_paren_stack.len() { max_depth = open_paren_stack.len(); } - } else if ch == close { + } else if ch == b')' || ch == b'}' { if let Some((open_ch, open_pos)) = open_paren_stack.pop() { if (open_ch == b'(' && ch == b'}') || (open_ch == b'{' && ch == b')') { return Err(ParseTreeError::MismatchedParens { @@ -250,26 +236,26 @@ impl<'a> Tree<'a> { Ok((s, max_depth)) } - pub(crate) fn from_slice_delim(s: &'a str, delim: Deliminator) -> Result { - let (oparen, cparen) = match delim { - Deliminator::NonTaproot => (b'(', b')'), - Deliminator::Taproot => (b'{', b'}'), - }; + /// Parses a tree from a string + #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes. + pub fn from_str(s: &'a str) -> Result { + Self::from_str_inner(s).map_err(Error::ParseTree) + } + fn from_str_inner(s: &'a str) -> Result { // First, scan through string to make sure it is well-formed. - let (s, max_depth) = Self::parse_pre_check(s, oparen, cparen)?; + let (s, max_depth) = Self::parse_pre_check(s)?; // Now, knowing it is sane and well-formed, we can easily parse it backward, // which will yield a post-order right-to-left iterator of its nodes. let mut stack = Vec::with_capacity(max_depth); let mut children_parens: Option<(Vec<_>, usize, Parens)> = None; let mut node_name_end = s.len(); - let mut tapleaf_depth = 0; for (pos, ch) in s.bytes().enumerate().rev() { - if ch == cparen { + if ch == b')' || ch == b'}' { stack.push(vec![]); node_name_end = pos; - } else if tapleaf_depth == 0 && ch == b',' { + } else if ch == b',' { let (mut args, children_pos, parens) = children_parens .take() @@ -281,7 +267,7 @@ impl<'a> Tree<'a> { Tree { name: &s[pos + 1..node_name_end], children_pos, parens, args }; top.push(new_tree); node_name_end = pos; - } else if ch == oparen { + } else if ch == b'(' || ch == b'{' { let (mut args, children_pos, parens) = children_parens .take() @@ -302,10 +288,6 @@ impl<'a> Tree<'a> { }, )); node_name_end = pos; - } else if delim == Deliminator::Taproot && ch == b'(' { - tapleaf_depth += 1; - } else if delim == Deliminator::Taproot && ch == b')' { - tapleaf_depth -= 1; } } @@ -318,12 +300,6 @@ impl<'a> Tree<'a> { Ok(Tree { name: &s[..node_name_end], children_pos, parens, args }) } - /// Parses a tree from a string - #[allow(clippy::should_implement_trait)] // Cannot use std::str::FromStr because of lifetimes. - pub fn from_str(s: &'a str) -> Result, Error> { - Self::from_slice_delim(s, Deliminator::NonTaproot).map_err(Error::ParseTree) - } - /// Parses an expression tree as a threshold (a term with at least one child, /// the first of which is a positive integer k). /// @@ -427,6 +403,16 @@ mod tests { Tree { name, parens: Parens::Round, children_pos: name.len(), args } } + fn brace_node<'a>(name: &'a str, mut args: Vec>) -> Tree<'a> { + let mut offset = name.len() + 1; // +1 for open paren + for arg in &mut args { + arg.children_pos += offset; + offset += arg.name.len() + 1; // +1 for comma + } + + Tree { name, parens: Parens::Curly, children_pos: name.len(), args } + } + #[test] fn test_parse_num() { assert!(parse_num("0").is_ok()); @@ -518,11 +504,10 @@ mod tests { #[test] fn parse_tree_taproot() { - // This test will change in a later PR which unifies TR and non-TR parsing. - assert!(matches!( - Tree::from_str("a{b(c),d}").unwrap_err(), - Error::ParseTree(ParseTreeError::TrailingCharacter { ch: ',', pos: 6 }), - )); + assert_eq!( + Tree::from_str("a{b(c),d}").unwrap(), + brace_node("a", vec![paren_node("b", vec![leaf("c")]), leaf("d")]), + ); } #[test]