From 9ffa4cc5cbdc79a7847d57bbb9937dac64fa78d2 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Fri, 26 Jul 2024 11:02:48 -0500 Subject: [PATCH 1/9] Add array_to_str_lossy --- .../noirc_evaluator/src/ssa/acir_gen/mod.rs | 9 ++++- .../check_for_underconstrained_values.rs | 1 + .../noirc_evaluator/src/ssa/ir/instruction.rs | 4 +++ .../src/ssa/ir/instruction/call.rs | 34 ++++++++++++++++++- compiler/noirc_evaluator/src/ssa/ir/types.rs | 5 +++ .../src/ssa/opt/remove_enable_side_effects.rs | 1 + .../src/ssa/opt/remove_if_else.rs | 1 + .../src/ssa/ssa_gen/context.rs | 2 +- noir_stdlib/src/array.nr | 17 ++++++++-- noir_stdlib/src/string.nr | 8 +++++ 10 files changed, 77 insertions(+), 5 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index d7dd5e5dbce..bfb68ff92bc 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -2714,7 +2714,14 @@ impl<'a> Context<'a> { .get_or_create_witness_var(input) .map(|val| self.convert_vars_to_values(vec![val], dfg, result_ids))?) } - _ => todo!("expected a black box function"), + Intrinsic::ArrayToStrLossy => todo!("non-constant array_to_str_lossy"), + Intrinsic::AssertConstant => unreachable!("Expected assert_constant to be removed by this point"), + Intrinsic::StaticAssert => unreachable!("Expected static_assert to be removed by this point"), + Intrinsic::StrAsBytes => unreachable!("Expected as_bytes to be removed by this point"), + Intrinsic::FromField => unreachable!("Expected from_field to be removed by this point"), + Intrinsic::AsField => unreachable!("Expected as_field to be removed by this point"), + Intrinsic::IsUnconstrained => unreachable!("Expected is_unconstrained to be removed by this point"), + Intrinsic::DerivePedersenGenerators => unreachable!("DerivePedersenGenerators can only be called with constants"), } } diff --git a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs index 5831faa7c4d..2e2841a0d84 100644 --- a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs +++ b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs @@ -202,6 +202,7 @@ impl Context { | Intrinsic::AsWitness | Intrinsic::IsUnconstrained => {} Intrinsic::ArrayLen + | Intrinsic::ArrayToStrLossy | Intrinsic::AsField | Intrinsic::AsSlice | Intrinsic::BlackBox(..) diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs index 8cbae732ef9..43314e941c0 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs @@ -50,6 +50,7 @@ pub(crate) type InstructionId = Id; #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum Intrinsic { ArrayLen, + ArrayToStrLossy, AsSlice, AssertConstant, StaticAssert, @@ -75,6 +76,7 @@ impl std::fmt::Display for Intrinsic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Intrinsic::ArrayLen => write!(f, "array_len"), + Intrinsic::ArrayToStrLossy => write!(f, "array_to_str"), Intrinsic::AsSlice => write!(f, "as_slice"), Intrinsic::AssertConstant => write!(f, "assert_constant"), Intrinsic::StaticAssert => write!(f, "static_assert"), @@ -115,6 +117,7 @@ impl Intrinsic { Intrinsic::ToBits(_) | Intrinsic::ToRadix(_) => true, Intrinsic::ArrayLen + | Intrinsic::ArrayToStrLossy | Intrinsic::AsSlice | Intrinsic::SlicePushBack | Intrinsic::SlicePushFront @@ -143,6 +146,7 @@ impl Intrinsic { pub(crate) fn lookup(name: &str) -> Option { match name { "array_len" => Some(Intrinsic::ArrayLen), + "array_to_str_lossy" => Some(Intrinsic::ArrayToStrLossy), "as_slice" => Some(Intrinsic::AsSlice), "assert_constant" => Some(Intrinsic::AssertConstant), "static_assert" => Some(Intrinsic::StaticAssert), diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index ad01edbd0b2..d4709388ad4 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -1,5 +1,5 @@ use fxhash::FxHashMap as HashMap; -use std::{collections::VecDeque, rc::Rc}; +use std::{collections::VecDeque, rc::Rc, borrow::Cow}; use acvm::{acir::AcirField, acir::BlackBoxFunc, BlackBoxResolutionError, FieldElement}; use bn254_blackbox_solver::derive_generators; @@ -85,6 +85,12 @@ pub(super) fn simplify_call( SimplifyResult::None } } + Intrinsic::ArrayToStrLossy => { + match simplify_array_to_str_lossy(dfg, arguments[0]) { + Some(string) => SimplifyResult::SimplifiedTo(string), + None => SimplifyResult::None, + } + } Intrinsic::AsSlice => { let array = dfg.get_array_constant(arguments[0]); if let Some((array, array_type)) = array { @@ -321,6 +327,32 @@ pub(super) fn simplify_call( } } +fn simplify_array_to_str_lossy(dfg: &mut DataFlowGraph, value: ValueId) -> Option { + let array = dfg.get_array_constant(value)?.0; + let mut bytes = Vec::with_capacity(array.len()); + + for value in array { + let value = dfg.get_numeric_constant(value)?; + let char: u8 = value.try_to_u32().and_then(|x| x.try_into().ok())?; + bytes.push(char); + } + + // Convert the string to UTF-8. If the string converted as-is with no changes then + // we can reuse the original value id since strings are represented as byte arrays in SSA. + match String::from_utf8_lossy(&bytes) { + Cow::Borrowed(_) => Some(value), + Cow::Owned(new_bytes) => { + let mut new_array = im::Vector::new(); + for byte in new_bytes.bytes() { + let constant = (byte as u128).into(); + new_array.push_back(dfg.make_constant(constant, Type::char())); + } + let typ = Type::str(new_array.len()); + Some(dfg.make_array(new_array, typ)) + }, + } +} + /// Slices have a tuple structure (slice length, slice contents) to enable logic /// that uses dynamic slice lengths (such as with merging slices in the flattening pass). /// This method codegens an update to the slice length. diff --git a/compiler/noirc_evaluator/src/ssa/ir/types.rs b/compiler/noirc_evaluator/src/ssa/ir/types.rs index 56729a5cba9..e38b8bb1d88 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/types.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/types.rs @@ -109,6 +109,11 @@ impl Type { Type::unsigned(8) } + /// Creates the str type, of the given length N + pub(crate) fn str(length: usize) -> Type { + Type::Array(Rc::new(vec![Type::char()]), length) + } + /// Creates the native field type. pub(crate) fn field() -> Type { Type::Numeric(NumericType::NativeField) diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs index 1584b848564..f7b630132dd 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs @@ -158,6 +158,7 @@ impl Context { | Intrinsic::SliceRemove => true, Intrinsic::ArrayLen + | Intrinsic::ArrayToStrLossy | Intrinsic::AssertConstant | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs index 242eea7d6f4..b1abfaeee45 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs @@ -229,6 +229,7 @@ fn slice_capacity_change( | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint | Intrinsic::ArrayLen + | Intrinsic::ArrayToStrLossy | Intrinsic::StrAsBytes | Intrinsic::BlackBox(_) | Intrinsic::FromField diff --git a/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs b/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs index 8e55debec1d..e16f6697c70 100644 --- a/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs +++ b/compiler/noirc_evaluator/src/ssa/ssa_gen/context.rs @@ -242,7 +242,7 @@ impl<'a> FunctionContext<'a> { ast::Type::Integer(Signedness::Signed, bits) => Type::signed((*bits).into()), ast::Type::Integer(Signedness::Unsigned, bits) => Type::unsigned((*bits).into()), ast::Type::Bool => Type::unsigned(1), - ast::Type::String(len) => Type::Array(Rc::new(vec![Type::char()]), *len as usize), + ast::Type::String(len) => Type::str(*len as usize), ast::Type::FmtString(_, _) => { panic!("convert_non_tuple_type called on a fmt string: {typ}") } diff --git a/noir_stdlib/src/array.nr b/noir_stdlib/src/array.nr index ad9c7093d07..27bab1dc827 100644 --- a/noir_stdlib/src/array.nr +++ b/noir_stdlib/src/array.nr @@ -1,7 +1,7 @@ use crate::cmp::Ord; +use crate::option::Option; +use crate::convert::From; -// TODO: Once we fully move to the new SSA pass this module can be removed and replaced -// by the methods in the `slice` module impl [T; N] { #[builtin(array_len)] pub fn len(self) -> u32 {} @@ -108,6 +108,13 @@ impl [T; N] { } } +impl [u8; N] { + /// Convert a UTF-8 encoded sequence of bytes into a string. + /// Replaces any invalid UTF-8 characters with a placeholder character. + #[builtin(array_to_str_lossy)] + pub fn as_str_lossy(self) -> str {} +} + // helper function used to look up the position of a value in an array of Field // Note that function returns 0 if the value is not found unconstrained fn find_index(a: [u32; N], find: u32) -> u32 { @@ -119,3 +126,9 @@ unconstrained fn find_index(a: [u32; N], find: u32) -> u32 { } result } + +impl From> for [u8; N] { + fn from(s: str) -> Self { + s.as_bytes() + } +} diff --git a/noir_stdlib/src/string.nr b/noir_stdlib/src/string.nr index 5f8f3de775d..895c87fff1b 100644 --- a/noir_stdlib/src/string.nr +++ b/noir_stdlib/src/string.nr @@ -1,4 +1,6 @@ use crate::collections::vec::Vec; +use crate::convert::From; + impl str { /// Converts the given string into a byte array #[builtin(str_as_bytes)] @@ -9,3 +11,9 @@ impl str { Vec::from_slice(self.as_bytes().as_slice()) } } + +impl From<[u8; N]> for str { + fn from(bytes: [u8; N]) -> Self { + bytes.as_str_lossy() + } +} From 5bff363da437d6b6ced00d8900be05f0e7b00641 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Fri, 26 Jul 2024 11:09:16 -0500 Subject: [PATCH 2/9] Add docs --- docs/docs/noir/concepts/data_types/arrays.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/docs/noir/concepts/data_types/arrays.md b/docs/docs/noir/concepts/data_types/arrays.md index 9a4ab5d3c1f..d6c19544b95 100644 --- a/docs/docs/noir/concepts/data_types/arrays.md +++ b/docs/docs/noir/concepts/data_types/arrays.md @@ -251,3 +251,23 @@ fn main() { } ``` + +### as_str_lossy + +Converts a byte array of type `[u8; N]` to a UTF-8 encoded string. Any invalid UTF-8 character +is replaced with a placeholder character in the returned string. + +```rust +impl [u8; N] { + pub fn as_str_lossy(self) -> str +} +``` + +example: + +```rust +fn main() { + let hi = [104, 105].as_str_lossy(); + assert_eq(hi, "hi"); +} +``` From 4dfdd4616e7c7761226ae855537823c6fa8ee928 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Fri, 26 Jul 2024 11:09:28 -0500 Subject: [PATCH 3/9] Add docs --- compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs | 16 ++++++++++++---- .../src/ssa/ir/instruction/call.rs | 14 ++++++-------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index bfb68ff92bc..7c343fdf117 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -2715,13 +2715,21 @@ impl<'a> Context<'a> { .map(|val| self.convert_vars_to_values(vec![val], dfg, result_ids))?) } Intrinsic::ArrayToStrLossy => todo!("non-constant array_to_str_lossy"), - Intrinsic::AssertConstant => unreachable!("Expected assert_constant to be removed by this point"), - Intrinsic::StaticAssert => unreachable!("Expected static_assert to be removed by this point"), + Intrinsic::AssertConstant => { + unreachable!("Expected assert_constant to be removed by this point") + } + Intrinsic::StaticAssert => { + unreachable!("Expected static_assert to be removed by this point") + } Intrinsic::StrAsBytes => unreachable!("Expected as_bytes to be removed by this point"), Intrinsic::FromField => unreachable!("Expected from_field to be removed by this point"), Intrinsic::AsField => unreachable!("Expected as_field to be removed by this point"), - Intrinsic::IsUnconstrained => unreachable!("Expected is_unconstrained to be removed by this point"), - Intrinsic::DerivePedersenGenerators => unreachable!("DerivePedersenGenerators can only be called with constants"), + Intrinsic::IsUnconstrained => { + unreachable!("Expected is_unconstrained to be removed by this point") + } + Intrinsic::DerivePedersenGenerators => { + unreachable!("DerivePedersenGenerators can only be called with constants") + } } } diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index d4709388ad4..a7697f3e22d 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -1,5 +1,5 @@ use fxhash::FxHashMap as HashMap; -use std::{collections::VecDeque, rc::Rc, borrow::Cow}; +use std::{borrow::Cow, collections::VecDeque, rc::Rc}; use acvm::{acir::AcirField, acir::BlackBoxFunc, BlackBoxResolutionError, FieldElement}; use bn254_blackbox_solver::derive_generators; @@ -85,12 +85,10 @@ pub(super) fn simplify_call( SimplifyResult::None } } - Intrinsic::ArrayToStrLossy => { - match simplify_array_to_str_lossy(dfg, arguments[0]) { - Some(string) => SimplifyResult::SimplifiedTo(string), - None => SimplifyResult::None, - } - } + Intrinsic::ArrayToStrLossy => match simplify_array_to_str_lossy(dfg, arguments[0]) { + Some(string) => SimplifyResult::SimplifiedTo(string), + None => SimplifyResult::None, + }, Intrinsic::AsSlice => { let array = dfg.get_array_constant(arguments[0]); if let Some((array, array_type)) = array { @@ -349,7 +347,7 @@ fn simplify_array_to_str_lossy(dfg: &mut DataFlowGraph, value: ValueId) -> Optio } let typ = Type::str(new_array.len()); Some(dfg.make_array(new_array, typ)) - }, + } } } From e1afc4925a797fa87bb49331e80fa2320c789ce2 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 29 Jul 2024 11:44:26 -0500 Subject: [PATCH 4/9] Stop validating UTF-8 and add interpreter impl --- .../noirc_evaluator/src/ssa/acir_gen/mod.rs | 2 +- .../check_for_underconstrained_values.rs | 2 +- .../noirc_evaluator/src/ssa/ir/instruction.rs | 8 +++---- .../src/ssa/ir/instruction/call.rs | 2 +- .../src/ssa/opt/remove_enable_side_effects.rs | 2 +- .../src/ssa/opt/remove_if_else.rs | 2 +- .../src/hir/comptime/interpreter/builtin.rs | 24 +++++++++++++++++++ noir_stdlib/src/array.nr | 8 +++---- 8 files changed, 37 insertions(+), 13 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index 7c343fdf117..9dc58ae1a05 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -2714,7 +2714,7 @@ impl<'a> Context<'a> { .get_or_create_witness_var(input) .map(|val| self.convert_vars_to_values(vec![val], dfg, result_ids))?) } - Intrinsic::ArrayToStrLossy => todo!("non-constant array_to_str_lossy"), + Intrinsic::ArrayAsStr => Ok(vec![self.convert_value(arguments[0], dfg)]), Intrinsic::AssertConstant => { unreachable!("Expected assert_constant to be removed by this point") } diff --git a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs index 2e2841a0d84..37f27c056f2 100644 --- a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs +++ b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs @@ -202,7 +202,7 @@ impl Context { | Intrinsic::AsWitness | Intrinsic::IsUnconstrained => {} Intrinsic::ArrayLen - | Intrinsic::ArrayToStrLossy + | Intrinsic::ArrayAsStr | Intrinsic::AsField | Intrinsic::AsSlice | Intrinsic::BlackBox(..) diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs index 43314e941c0..bef6a739265 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs @@ -50,7 +50,7 @@ pub(crate) type InstructionId = Id; #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum Intrinsic { ArrayLen, - ArrayToStrLossy, + ArrayAsStr, AsSlice, AssertConstant, StaticAssert, @@ -76,7 +76,7 @@ impl std::fmt::Display for Intrinsic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Intrinsic::ArrayLen => write!(f, "array_len"), - Intrinsic::ArrayToStrLossy => write!(f, "array_to_str"), + Intrinsic::ArrayAsStr => write!(f, "array_as_str"), Intrinsic::AsSlice => write!(f, "as_slice"), Intrinsic::AssertConstant => write!(f, "assert_constant"), Intrinsic::StaticAssert => write!(f, "static_assert"), @@ -117,7 +117,7 @@ impl Intrinsic { Intrinsic::ToBits(_) | Intrinsic::ToRadix(_) => true, Intrinsic::ArrayLen - | Intrinsic::ArrayToStrLossy + | Intrinsic::ArrayAsStr | Intrinsic::AsSlice | Intrinsic::SlicePushBack | Intrinsic::SlicePushFront @@ -146,7 +146,7 @@ impl Intrinsic { pub(crate) fn lookup(name: &str) -> Option { match name { "array_len" => Some(Intrinsic::ArrayLen), - "array_to_str_lossy" => Some(Intrinsic::ArrayToStrLossy), + "array_as_str" => Some(Intrinsic::ArrayAsStr), "as_slice" => Some(Intrinsic::AsSlice), "assert_constant" => Some(Intrinsic::AssertConstant), "static_assert" => Some(Intrinsic::StaticAssert), diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index a7697f3e22d..2a65cef02df 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -85,7 +85,7 @@ pub(super) fn simplify_call( SimplifyResult::None } } - Intrinsic::ArrayToStrLossy => match simplify_array_to_str_lossy(dfg, arguments[0]) { + Intrinsic::ArrayAsStr => match simplify_array_to_str_lossy(dfg, arguments[0]) { Some(string) => SimplifyResult::SimplifiedTo(string), None => SimplifyResult::None, }, diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs index f7b630132dd..98f8c31351a 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs @@ -158,7 +158,7 @@ impl Context { | Intrinsic::SliceRemove => true, Intrinsic::ArrayLen - | Intrinsic::ArrayToStrLossy + | Intrinsic::ArrayAsStr | Intrinsic::AssertConstant | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs index b1abfaeee45..a0e8e0ba2d8 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs @@ -229,7 +229,7 @@ fn slice_capacity_change( | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint | Intrinsic::ArrayLen - | Intrinsic::ArrayToStrLossy + | Intrinsic::ArrayAsStr | Intrinsic::StrAsBytes | Intrinsic::BlackBox(_) | Intrinsic::FromField diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs index 92890cb66b8..5f39af983da 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs @@ -32,6 +32,7 @@ impl<'local, 'context> Interpreter<'local, 'context> { let interner = &mut self.elaborator.interner; match name { "array_len" => array_len(interner, arguments, location), + "array_as_str" => array_as_str(interner, arguments, location), "as_slice" => as_slice(interner, arguments, location), "is_unconstrained" => Ok(Value::Bool(true)), "modulus_be_bits" => modulus_be_bits(interner, arguments, location), @@ -123,6 +124,16 @@ pub(super) fn get_field(value: Value, location: Location) -> IResult IResult { + match value { + Value::U8(value) => Ok(value), + value => { + let expected = Type::Integer(Signedness::Unsigned, IntegerBitSize::Eight); + Err(InterpreterError::TypeMismatch { expected, value, location }) + } + } +} + pub(super) fn get_u32(value: Value, location: Location) -> IResult { match value { Value::U32(value) => Ok(value), @@ -180,6 +191,19 @@ fn array_len( } } +fn array_as_str( + interner: &NodeInterner, + mut arguments: Vec<(Value, Location)>, + location: Location, +) -> IResult { + check_argument_count(1, &arguments, location)?; + + let array = get_array(interner, arguments.pop().unwrap().0, location)?.0; + let string_bytes = try_vecmap(array, |byte| get_u8(byte, location))?; + let string = String::from_utf8_lossy(&string_bytes).into_owned(); + Ok(Value::String(Rc::new(string))) +} + fn as_slice( interner: &NodeInterner, mut arguments: Vec<(Value, Location)>, diff --git a/noir_stdlib/src/array.nr b/noir_stdlib/src/array.nr index 27bab1dc827..d5146df894a 100644 --- a/noir_stdlib/src/array.nr +++ b/noir_stdlib/src/array.nr @@ -109,10 +109,10 @@ impl [T; N] { } impl [u8; N] { - /// Convert a UTF-8 encoded sequence of bytes into a string. - /// Replaces any invalid UTF-8 characters with a placeholder character. - #[builtin(array_to_str_lossy)] - pub fn as_str_lossy(self) -> str {} + /// Convert a sequence of bytes as-is into a string. + /// This function performs no UTF-8 validation or similar. + #[builtin(array_as_str)] + pub fn as_str(self) -> str {} } // helper function used to look up the position of a value in an array of Field From d41d2094e0a27c79b477fe254ca5bba8be35f832 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 29 Jul 2024 11:46:57 -0500 Subject: [PATCH 5/9] Remove SSA simplification fn --- .../src/ssa/ir/instruction/call.rs | 34 ++----------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index 2a65cef02df..edc759bc866 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -1,5 +1,5 @@ use fxhash::FxHashMap as HashMap; -use std::{borrow::Cow, collections::VecDeque, rc::Rc}; +use std::{collections::VecDeque, rc::Rc}; use acvm::{acir::AcirField, acir::BlackBoxFunc, BlackBoxResolutionError, FieldElement}; use bn254_blackbox_solver::derive_generators; @@ -85,10 +85,8 @@ pub(super) fn simplify_call( SimplifyResult::None } } - Intrinsic::ArrayAsStr => match simplify_array_to_str_lossy(dfg, arguments[0]) { - Some(string) => SimplifyResult::SimplifiedTo(string), - None => SimplifyResult::None, - }, + // Strings are already arrays of bytes in SSA + Intrinsic::ArrayAsStr => SimplifyResult::SimplifiedTo(arguments[0]), Intrinsic::AsSlice => { let array = dfg.get_array_constant(arguments[0]); if let Some((array, array_type)) = array { @@ -325,32 +323,6 @@ pub(super) fn simplify_call( } } -fn simplify_array_to_str_lossy(dfg: &mut DataFlowGraph, value: ValueId) -> Option { - let array = dfg.get_array_constant(value)?.0; - let mut bytes = Vec::with_capacity(array.len()); - - for value in array { - let value = dfg.get_numeric_constant(value)?; - let char: u8 = value.try_to_u32().and_then(|x| x.try_into().ok())?; - bytes.push(char); - } - - // Convert the string to UTF-8. If the string converted as-is with no changes then - // we can reuse the original value id since strings are represented as byte arrays in SSA. - match String::from_utf8_lossy(&bytes) { - Cow::Borrowed(_) => Some(value), - Cow::Owned(new_bytes) => { - let mut new_array = im::Vector::new(); - for byte in new_bytes.bytes() { - let constant = (byte as u128).into(); - new_array.push_back(dfg.make_constant(constant, Type::char())); - } - let typ = Type::str(new_array.len()); - Some(dfg.make_array(new_array, typ)) - } - } -} - /// Slices have a tuple structure (slice length, slice contents) to enable logic /// that uses dynamic slice lengths (such as with merging slices in the flattening pass). /// This method codegens an update to the slice length. From e9ddada04c2ce2fc90c582171183c10d07de768e Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 29 Jul 2024 11:50:31 -0500 Subject: [PATCH 6/9] Update docs --- docs/docs/noir/concepts/data_types/arrays.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/docs/noir/concepts/data_types/arrays.md b/docs/docs/noir/concepts/data_types/arrays.md index d6c19544b95..09542c6c940 100644 --- a/docs/docs/noir/concepts/data_types/arrays.md +++ b/docs/docs/noir/concepts/data_types/arrays.md @@ -252,14 +252,14 @@ fn main() { ``` -### as_str_lossy +### as_str -Converts a byte array of type `[u8; N]` to a UTF-8 encoded string. Any invalid UTF-8 character -is replaced with a placeholder character in the returned string. +Converts a byte array of type `[u8; N]` to a string. Note that this performs no UTF-8 validation - +the given array is interpreted as-is as a string. ```rust impl [u8; N] { - pub fn as_str_lossy(self) -> str + pub fn as_str(self) -> str } ``` @@ -267,7 +267,7 @@ example: ```rust fn main() { - let hi = [104, 105].as_str_lossy(); + let hi = [104, 105].as_str(); assert_eq(hi, "hi"); } ``` From 9b81e69e5f9be9ee9203c9c2bc67e9c24e54e4d4 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 5 Aug 2024 15:34:40 -0500 Subject: [PATCH 7/9] as_str -> as_str_unchecked --- compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs | 2 +- .../src/ssa/checks/check_for_underconstrained_values.rs | 2 +- compiler/noirc_evaluator/src/ssa/ir/instruction.rs | 8 ++++---- compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs | 2 +- .../src/ssa/opt/remove_enable_side_effects.rs | 2 +- compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs | 2 +- .../src/hir/comptime/interpreter/builtin.rs | 4 ++-- noir_stdlib/src/array.nr | 4 ++-- noir_stdlib/src/string.nr | 2 +- 9 files changed, 14 insertions(+), 14 deletions(-) diff --git a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs index 9dc58ae1a05..0fd8e528c00 100644 --- a/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs +++ b/compiler/noirc_evaluator/src/ssa/acir_gen/mod.rs @@ -2714,7 +2714,7 @@ impl<'a> Context<'a> { .get_or_create_witness_var(input) .map(|val| self.convert_vars_to_values(vec![val], dfg, result_ids))?) } - Intrinsic::ArrayAsStr => Ok(vec![self.convert_value(arguments[0], dfg)]), + Intrinsic::ArrayAsStrUnchecked => Ok(vec![self.convert_value(arguments[0], dfg)]), Intrinsic::AssertConstant => { unreachable!("Expected assert_constant to be removed by this point") } diff --git a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs index 37f27c056f2..24fcb8f61df 100644 --- a/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs +++ b/compiler/noirc_evaluator/src/ssa/checks/check_for_underconstrained_values.rs @@ -202,7 +202,7 @@ impl Context { | Intrinsic::AsWitness | Intrinsic::IsUnconstrained => {} Intrinsic::ArrayLen - | Intrinsic::ArrayAsStr + | Intrinsic::ArrayAsStrUnchecked | Intrinsic::AsField | Intrinsic::AsSlice | Intrinsic::BlackBox(..) diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs index bef6a739265..a1830f8ea1d 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction.rs @@ -50,7 +50,7 @@ pub(crate) type InstructionId = Id; #[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] pub(crate) enum Intrinsic { ArrayLen, - ArrayAsStr, + ArrayAsStrUnchecked, AsSlice, AssertConstant, StaticAssert, @@ -76,7 +76,7 @@ impl std::fmt::Display for Intrinsic { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Intrinsic::ArrayLen => write!(f, "array_len"), - Intrinsic::ArrayAsStr => write!(f, "array_as_str"), + Intrinsic::ArrayAsStrUnchecked => write!(f, "array_as_str_unchecked"), Intrinsic::AsSlice => write!(f, "as_slice"), Intrinsic::AssertConstant => write!(f, "assert_constant"), Intrinsic::StaticAssert => write!(f, "static_assert"), @@ -117,7 +117,7 @@ impl Intrinsic { Intrinsic::ToBits(_) | Intrinsic::ToRadix(_) => true, Intrinsic::ArrayLen - | Intrinsic::ArrayAsStr + | Intrinsic::ArrayAsStrUnchecked | Intrinsic::AsSlice | Intrinsic::SlicePushBack | Intrinsic::SlicePushFront @@ -146,7 +146,7 @@ impl Intrinsic { pub(crate) fn lookup(name: &str) -> Option { match name { "array_len" => Some(Intrinsic::ArrayLen), - "array_as_str" => Some(Intrinsic::ArrayAsStr), + "array_as_str_unchecked" => Some(Intrinsic::ArrayAsStrUnchecked), "as_slice" => Some(Intrinsic::AsSlice), "assert_constant" => Some(Intrinsic::AssertConstant), "static_assert" => Some(Intrinsic::StaticAssert), diff --git a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs index edc759bc866..08f145f598f 100644 --- a/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs +++ b/compiler/noirc_evaluator/src/ssa/ir/instruction/call.rs @@ -86,7 +86,7 @@ pub(super) fn simplify_call( } } // Strings are already arrays of bytes in SSA - Intrinsic::ArrayAsStr => SimplifyResult::SimplifiedTo(arguments[0]), + Intrinsic::ArrayAsStrUnchecked => SimplifyResult::SimplifiedTo(arguments[0]), Intrinsic::AsSlice => { let array = dfg.get_array_constant(arguments[0]); if let Some((array, array_type)) = array { diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs index 98f8c31351a..224060e131f 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_enable_side_effects.rs @@ -158,7 +158,7 @@ impl Context { | Intrinsic::SliceRemove => true, Intrinsic::ArrayLen - | Intrinsic::ArrayAsStr + | Intrinsic::ArrayAsStrUnchecked | Intrinsic::AssertConstant | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint diff --git a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs index a0e8e0ba2d8..c70760de85f 100644 --- a/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs +++ b/compiler/noirc_evaluator/src/ssa/opt/remove_if_else.rs @@ -229,7 +229,7 @@ fn slice_capacity_change( | Intrinsic::StaticAssert | Intrinsic::ApplyRangeConstraint | Intrinsic::ArrayLen - | Intrinsic::ArrayAsStr + | Intrinsic::ArrayAsStrUnchecked | Intrinsic::StrAsBytes | Intrinsic::BlackBox(_) | Intrinsic::FromField diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs index 5f39af983da..64501bd1558 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs @@ -32,7 +32,7 @@ impl<'local, 'context> Interpreter<'local, 'context> { let interner = &mut self.elaborator.interner; match name { "array_len" => array_len(interner, arguments, location), - "array_as_str" => array_as_str(interner, arguments, location), + "array_as_str_unchecked" => array_as_str_unchecked(interner, arguments, location), "as_slice" => as_slice(interner, arguments, location), "is_unconstrained" => Ok(Value::Bool(true)), "modulus_be_bits" => modulus_be_bits(interner, arguments, location), @@ -191,7 +191,7 @@ fn array_len( } } -fn array_as_str( +fn array_as_str_unchecked( interner: &NodeInterner, mut arguments: Vec<(Value, Location)>, location: Location, diff --git a/noir_stdlib/src/array.nr b/noir_stdlib/src/array.nr index d5146df894a..af2bea12c60 100644 --- a/noir_stdlib/src/array.nr +++ b/noir_stdlib/src/array.nr @@ -111,8 +111,8 @@ impl [T; N] { impl [u8; N] { /// Convert a sequence of bytes as-is into a string. /// This function performs no UTF-8 validation or similar. - #[builtin(array_as_str)] - pub fn as_str(self) -> str {} + #[builtin(array_as_str_unchecked)] + pub fn as_str_unchecked(self) -> str {} } // helper function used to look up the position of a value in an array of Field diff --git a/noir_stdlib/src/string.nr b/noir_stdlib/src/string.nr index 895c87fff1b..18fb449626a 100644 --- a/noir_stdlib/src/string.nr +++ b/noir_stdlib/src/string.nr @@ -14,6 +14,6 @@ impl str { impl From<[u8; N]> for str { fn from(bytes: [u8; N]) -> Self { - bytes.as_str_lossy() + bytes.as_str_unchecked() } } From 5bc251e579f37292cd0b1b5174bf094e12315699 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 5 Aug 2024 15:35:53 -0500 Subject: [PATCH 8/9] Update docs --- docs/docs/noir/concepts/data_types/arrays.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/docs/noir/concepts/data_types/arrays.md b/docs/docs/noir/concepts/data_types/arrays.md index 09542c6c940..e5e9f5a1d3b 100644 --- a/docs/docs/noir/concepts/data_types/arrays.md +++ b/docs/docs/noir/concepts/data_types/arrays.md @@ -252,14 +252,14 @@ fn main() { ``` -### as_str +### as_str_unchecked Converts a byte array of type `[u8; N]` to a string. Note that this performs no UTF-8 validation - the given array is interpreted as-is as a string. ```rust impl [u8; N] { - pub fn as_str(self) -> str + pub fn as_str_unchecked(self) -> str } ``` @@ -267,7 +267,7 @@ example: ```rust fn main() { - let hi = [104, 105].as_str(); + let hi = [104, 105].as_str_unchecked(); assert_eq(hi, "hi"); } ``` From ad134e0409f6dbcf3d6eec9eab10f3a78cdedd84 Mon Sep 17 00:00:00 2001 From: Jake Fecher Date: Mon, 5 Aug 2024 15:38:23 -0500 Subject: [PATCH 9/9] Fix merge difference --- .../noirc_frontend/src/hir/comptime/interpreter/builtin.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs index 5088d660d4c..108e78dc1e5 100644 --- a/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs +++ b/compiler/noirc_frontend/src/hir/comptime/interpreter/builtin.rs @@ -31,8 +31,8 @@ impl<'local, 'context> Interpreter<'local, 'context> { ) -> IResult { let interner = &mut self.elaborator.interner; match name { - "array_len" => array_len(interner, arguments, location), "array_as_str_unchecked" => array_as_str_unchecked(interner, arguments, location), + "array_len" => array_len(interner, arguments, location), "as_slice" => as_slice(interner, arguments, location), "is_unconstrained" => Ok(Value::Bool(true)), "modulus_be_bits" => modulus_be_bits(interner, arguments, location), @@ -175,7 +175,8 @@ fn get_u8(value: Value, location: Location) -> IResult { Value::U8(value) => Ok(value), value => { let expected = Type::Integer(Signedness::Unsigned, IntegerBitSize::Eight); - Err(InterpreterError::TypeMismatch { expected, value, location }) + let actual = value.get_type().into_owned(); + Err(InterpreterError::TypeMismatch { expected, actual, location }) } } }