Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor expression parsing and checksum checking #773

Merged
merged 5 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 110 additions & 35 deletions src/descriptor/checksum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,116 @@ use bech32::{Checksum, Fe32};

pub use crate::expression::VALID_CHARS;
use crate::prelude::*;
use crate::Error;

const CHECKSUM_LENGTH: usize = 8;
const CODE_LENGTH: usize = 32767;

/// Compute the checksum of a descriptor.
/// Map of valid characters in descriptor strings.
///
/// Note that this function does not check if the descriptor string is
/// syntactically correct or not. This only computes the checksum.
pub fn desc_checksum(desc: &str) -> Result<String, Error> {
let mut eng = Engine::new();
eng.input(desc)?;
Ok(eng.checksum())
/// The map starts at 32 (space) and runs up to 126 (tilde).
#[rustfmt::skip]
const CHAR_MAP: [u8; 95] = [
94, 59, 92, 91, 28, 29, 50, 15, 10, 11, 17, 51, 14, 52, 53, 16,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 27, 54, 55, 56, 57, 58,
26, 82, 83, 84, 85, 86, 87, 88, 89, 32, 33, 34, 35, 36, 37, 38,
39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 12, 93, 13, 60, 61,
90, 18, 19, 20, 21, 22, 23, 24, 25, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 30, 62, 31, 63,
];

/// Error validating descriptor checksum.
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
pub enum Error {
/// Character outside of descriptor charset.
InvalidCharacter {
/// The character in question.
ch: char,
/// Its position in the string.
pos: usize,
},
/// Checksum had the incorrect length.
InvalidChecksumLength {
/// The length of the checksum in the string.
actual: usize,
/// The length of a valid descriptor checksum.
expected: usize,
},
/// Checksum was invalid.
InvalidChecksum {
/// The checksum in the string.
actual: [char; CHECKSUM_LENGTH],
/// The checksum that should have been there, assuming the string is valid.
expected: [char; CHECKSUM_LENGTH],
},
}

impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidCharacter { ch, pos } => {
write!(f, "invalid character '{}' (position {})", ch, pos)
}
Error::InvalidChecksumLength { actual, expected } => {
write!(f, "invalid checksum (length {}, expected {})", actual, expected)
}
Error::InvalidChecksum { actual, expected } => {
f.write_str("invalid checksum ")?;
for ch in actual {
ch.fmt(f)?;
}
f.write_str("; expected ")?;
for ch in expected {
ch.fmt(f)?;
}
Ok(())
}
}
}
}

#[cfg(feature = "std")]
impl std::error::Error for Error {
fn cause(&self) -> Option<&dyn std::error::Error> { None }
}

/// Helper function for `FromStr` for various descriptor types.
///
/// Checks and verifies the checksum if it is present and returns the descriptor
/// string without the checksum.
pub(super) fn verify_checksum(s: &str) -> Result<&str, Error> {
for ch in s.as_bytes() {
if *ch < 20 || *ch > 127 {
return Err(Error::Unprintable(*ch));
pub fn verify_checksum(s: &str) -> Result<&str, Error> {
let mut last_hash_pos = s.len();
for (pos, ch) in s.char_indices() {
if !(32..127).contains(&u32::from(ch)) {
sanket1729 marked this conversation as resolved.
Show resolved Hide resolved
return Err(Error::InvalidCharacter { ch, pos });
} else if ch == '#' {
last_hash_pos = pos;
}
}
// After this point we know we have ASCII and can stop using character methods.

if last_hash_pos < s.len() {
let checksum_str = &s[last_hash_pos + 1..];
if checksum_str.len() != CHECKSUM_LENGTH {
return Err(Error::InvalidChecksumLength {
actual: checksum_str.len(),
expected: CHECKSUM_LENGTH,
});
}

let mut eng = Engine::new();
eng.input_unchecked(s[..last_hash_pos].as_bytes());

let mut parts = s.splitn(2, '#');
let desc_str = parts.next().unwrap();
if let Some(checksum_str) = parts.next() {
let expected_sum = desc_checksum(desc_str)?;
if checksum_str != expected_sum {
return Err(Error::BadDescriptor(format!(
"Invalid checksum '{}', expected '{}'",
checksum_str, expected_sum
)));
let expected = eng.checksum_chars();
let mut actual = ['_'; CHECKSUM_LENGTH];
for (act, ch) in actual.iter_mut().zip(checksum_str.chars()) {
*act = ch;
Copy link
Member

@sanket1729 sanket1729 Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From chatgpt; with MSRV 1.63; you can avoid the mut and creating the actual with all '_' as

let mut iter = checksum_str.chars()
let actual: [char; CHECKSUM_LENGTH] = core::array::from_fn(|_| iter.next().expect("Len checked eariler))

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice! I will add a commit to my next PR doing this. I'm thrilled to have from_fn now.

}

if expected != actual {
return Err(Error::InvalidChecksum { actual, expected });
}
}
Ok(desc_str)
Ok(&s[..last_hash_pos])
}

/// An engine to compute a checksum from a string.
Expand All @@ -78,16 +150,18 @@ impl Engine {
/// If this function returns an error, the `Engine` will be left in an indeterminate
/// state! It is safe to continue feeding it data but the result will not be meaningful.
pub fn input(&mut self, s: &str) -> Result<(), Error> {
for ch in s.chars() {
let pos = VALID_CHARS
.get(ch as usize)
.ok_or_else(|| {
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
})?
.ok_or_else(|| {
Error::BadDescriptor(format!("Invalid character in checksum: '{}'", ch))
})? as u64;
for (pos, ch) in s.char_indices() {
if !(32..127).contains(&u32::from(ch)) {
return Err(Error::InvalidCharacter { ch, pos });
}
}
self.input_unchecked(s.as_bytes());
Ok(())
}

fn input_unchecked(&mut self, s: &[u8]) {
for ch in s {
let pos = u64::from(CHAR_MAP[usize::from(*ch) - 32]);
let fe = Fe32::try_from(pos & 31).expect("pos is valid because of the mask");
self.inner.input_fe(fe);

Expand All @@ -100,7 +174,6 @@ impl Engine {
self.clscount = 0;
}
}
Ok(())
}

/// Obtains the checksum characters of all the data thus-far fed to the
Expand Down Expand Up @@ -192,7 +265,9 @@ mod test {

macro_rules! check_expected {
($desc: expr, $checksum: expr) => {
assert_eq!(desc_checksum($desc).unwrap(), $checksum);
let mut eng = Engine::new();
eng.input_unchecked($desc.as_bytes());
assert_eq!(eng.checksum(), $checksum);
};
}

Expand Down Expand Up @@ -229,8 +304,8 @@ mod test {
let invalid_desc = format!("wpkh(tprv8ZgxMBicQKsPdpkqS7Eair4YxjcuuvDPNYmKX3sCniCf16tHEVrjjiSXEkFRnUH77yXc6ZcwHHcL{}fjdi5qUvw3VDfgYiH5mNsj5izuiu2N/1/2/*)", sparkle_heart);

assert_eq!(
desc_checksum(&invalid_desc).err().unwrap().to_string(),
format!("Invalid descriptor: Invalid character in checksum: '{}'", sparkle_heart)
verify_checksum(&invalid_desc).err().unwrap().to_string(),
format!("invalid character '{}' (position 85)", sparkle_heart)
);
}

Expand Down
13 changes: 6 additions & 7 deletions src/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1053,8 +1053,7 @@ mod tests {
use bitcoin::sighash::EcdsaSighashType;
use bitcoin::{bip32, PublicKey, Sequence};

use super::checksum::desc_checksum;
use super::*;
use super::{checksum, *};
use crate::hex_script;
#[cfg(feature = "compiler")]
use crate::policy;
Expand All @@ -1066,10 +1065,10 @@ mod tests {
let desc = Descriptor::<String>::from_str(s).unwrap();
let output = desc.to_string();
let normalize_aliases = s.replace("c:pk_k(", "pk(").replace("c:pk_h(", "pkh(");
assert_eq!(
format!("{}#{}", &normalize_aliases, desc_checksum(&normalize_aliases).unwrap()),
output
);

let mut checksum_eng = checksum::Engine::new();
checksum_eng.input(&normalize_aliases).unwrap();
assert_eq!(format!("{}#{}", &normalize_aliases, checksum_eng.checksum()), output);
}

#[test]
Expand Down Expand Up @@ -1841,7 +1840,7 @@ mod tests {
($secp: ident,$($desc: expr),*) => {
$(
match Descriptor::parse_descriptor($secp, $desc) {
Err(Error::BadDescriptor(_)) => {},
Err(Error::Checksum(_)) => {},
Err(e) => panic!("Expected bad checksum for {}, got '{}'", $desc, e),
_ => panic!("Invalid checksum treated as valid: {}", $desc),
};
Expand Down
9 changes: 9 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,8 @@ pub enum Error {
Threshold(ThresholdError),
/// Invalid threshold.
ParseThreshold(ParseThresholdError),
/// Checksum error parsing a descriptor.
Checksum(descriptor::checksum::Error),
}

// https://github.com/sipa/miniscript/pull/5 for discussion on this number
Expand Down Expand Up @@ -553,6 +555,7 @@ impl fmt::Display for Error {
Error::RelativeLockTime(ref e) => e.fmt(f),
Error::Threshold(ref e) => e.fmt(f),
Error::ParseThreshold(ref e) => e.fmt(f),
Error::Checksum(ref e) => e.fmt(f),
}
}
}
Expand Down Expand Up @@ -603,6 +606,7 @@ impl error::Error for Error {
RelativeLockTime(e) => Some(e),
Threshold(e) => Some(e),
ParseThreshold(e) => Some(e),
Checksum(e) => Some(e),
}
}
}
Expand Down Expand Up @@ -642,6 +646,11 @@ impl From<bitcoin::address::P2shError> for Error {
fn from(e: bitcoin::address::P2shError) -> Error { Error::AddrP2shError(e) }
}

#[doc(hidden)]
impl From<descriptor::checksum::Error> for Error {
fn from(e: descriptor::checksum::Error) -> Error { Error::Checksum(e) }
}

#[doc(hidden)]
#[cfg(feature = "compiler")]
impl From<crate::policy::compiler::CompilerError> for Error {
Expand Down