diff --git a/Cargo.toml b/Cargo.toml index 96c1b0fa..974daa24 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,11 +16,12 @@ members = [ ] [workspace.dependencies] +rustpython-ast = { path = "ast", version = "0.2.0" } +rustpython-parser-core = { path = "core", version = "0.2.0" } +rustpython-literal = { path = "literal", version = "0.2.0" } + ahash = "0.7.6" anyhow = "1.0.45" -ascii = "1.0" -bitflags = "1.3.2" -bstr = "0.2.17" cfg-if = "1.0" insta = "1.14.0" itertools = "0.10.3" @@ -32,7 +33,7 @@ rand = "0.8.5" serde = "1.0" static_assertions = "1.1" unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "583df5c1fa43b2732896219f8ab425116c140c80" } +ruff_python_ast = { git = "https://github.com/youknowone/ruff.git", rev = "088958e8fda2f74f1ebf315c75db13c232409b13" } # ruff_python_ast = { path = "../ruff/crates/ruff_python_ast" } [profile.dev.package."*"] diff --git a/ast/Cargo.toml b/ast/Cargo.toml index 85f3b135..a7fb168c 100644 --- a/ast/Cargo.toml +++ b/ast/Cargo.toml @@ -8,14 +8,14 @@ repository = "https://github.com/RustPython/RustPython" license = "MIT" [features] -default = ["constant-optimization", "fold", "location"] +default = ["constant-optimization", "fold", "source-code"] constant-optimization = ["fold"] -location = [] +source-code = ["fold"] fold = [] unparse = ["rustpython-literal"] [dependencies] -rustpython-compiler-core = { path = "../core", version = "0.2.0" } -rustpython-literal = { path = "../literal", version = "0.2.0", optional = true } +rustpython-parser-core = { workspace = true } +rustpython-literal = { workspace = true, optional = true } num-bigint = { workspace = true } diff --git a/ast/asdl_rs.py b/ast/asdl_rs.py index 1da5d4e5..7f9bc216 100755 --- a/ast/asdl_rs.py +++ b/ast/asdl_rs.py @@ -8,7 +8,6 @@ from argparse import ArgumentParser from pathlib import Path from typing import Optional, Dict -from attr import dataclass import asdl @@ -18,7 +17,7 @@ builtin_type_mapping = { "identifier": "Ident", "string": "String", - "int": "usize", + "int": "u32", "constant": "Constant", } assert builtin_type_mapping.keys() == asdl.builtin_types @@ -391,7 +390,18 @@ def visitModule(self, mod, depth): depth + 1, ) self.emit( - "fn map_located(&mut self, located: Attributed) -> Result, Self::Error> { let custom = self.map_user(located.custom)?; Ok(Attributed { range: located.range, custom, node: located.node }) }", + """ + fn map_located(&mut self, located: Attributed) -> Result, Self::Error> { + let custom = self.map_user(located.custom)?; + Ok(Attributed { range: located.range, custom, node: located.node }) + }""", + depth + 1, + ) + self.emit( + """ + fn fold>(&mut self, node: X) -> Result { + node.fold(self) + }""", depth + 1, ) for dfn in mod.dfns: @@ -761,11 +771,15 @@ def gen_construction(self, cons_path, cons, name, depth): def extract_location(self, typename, depth): row = self.decode_field(asdl.Field("int", "lineno"), typename) column = self.decode_field(asdl.Field("int", "col_offset"), typename) - self.emit(f"""let _location = {{ - let row = try_location_field({row}, _vm)?; - let column = try_location_field({column}, _vm)?; - SourceLocation {{ row, column }} - }};""", depth) + self.emit( + f""" + let _location = {{ + let row = {row}; + let column = {column}; + try_location(row, column, _vm)? + }};""", + depth, + ) def decode_field(self, field, typename): name = json.dumps(field.name) @@ -805,7 +819,7 @@ def write_located_def(typeinfo, f): f.write( textwrap.dedent( """ - use crate::location::SourceRange; + use rustpython_parser_core::source_code::SourceRange; pub type Located = super::generic::Attributed; """ diff --git a/ast/src/attributed.rs b/ast/src/attributed.rs index 9bf2fbc6..234b965d 100644 --- a/ast/src/attributed.rs +++ b/ast/src/attributed.rs @@ -1,5 +1,7 @@ -use crate::location::{SourceLocation, SourceRange}; -use rustpython_compiler_core::text_size::{TextRange, TextSize}; +use rustpython_parser_core::{ + source_code::{SourceLocation, SourceRange}, + text_size::{TextRange, TextSize}, +}; #[derive(Clone, Debug, PartialEq)] pub struct Attributed { diff --git a/ast/src/constant.rs b/ast/src/constant.rs index aad36526..d6bacbc2 100644 --- a/ast/src/constant.rs +++ b/ast/src/constant.rs @@ -1,5 +1,4 @@ use num_bigint::BigInt; -pub use rustpython_compiler_core::ConversionFlag; #[derive(Clone, Debug, PartialEq)] pub enum Constant { @@ -137,7 +136,7 @@ impl crate::fold::Fold for ConstantOptimizer { #[cfg(test)] mod tests { use super::*; - use rustpython_compiler_core::text_size::TextRange; + use rustpython_parser_core::text_size::TextRange; #[cfg(feature = "constant-optimization")] #[test] diff --git a/ast/src/fold_helpers.rs b/ast/src/fold_helpers.rs index 83c7ae40..773d5c84 100644 --- a/ast/src/fold_helpers.rs +++ b/ast/src/fold_helpers.rs @@ -62,4 +62,4 @@ macro_rules! simple_fold { }; } -simple_fold!(usize, String, bool, constant::Constant); +simple_fold!(u32, String, bool, constant::Constant); diff --git a/ast/src/gen/generic.rs b/ast/src/gen/generic.rs index 1b183245..4e497eb4 100644 --- a/ast/src/gen/generic.rs +++ b/ast/src/gen/generic.rs @@ -158,7 +158,7 @@ pub struct StmtAnnAssign { pub target: Box>, pub annotation: Box>, pub value: Option>>, - pub simple: usize, + pub simple: u32, } impl From> for StmtKind { @@ -328,7 +328,7 @@ impl From> for StmtKind { pub struct StmtImportFrom { pub module: Option, pub names: Vec>, - pub level: Option, + pub level: Option, } impl From> for StmtKind { @@ -610,7 +610,7 @@ impl From> for ExprKind { #[derive(Clone, Debug, PartialEq)] pub struct ExprFormattedValue { pub value: Box>, - pub conversion: usize, + pub conversion: u32, pub format_spec: Option>>, } @@ -819,7 +819,7 @@ pub struct Comprehension { pub target: Expr, pub iter: Expr, pub ifs: Vec>, - pub is_async: usize, + pub is_async: u32, } #[derive(Clone, Debug, PartialEq)] @@ -996,7 +996,7 @@ pub type Pattern = Attributed, U>; #[derive(Clone, Debug, PartialEq)] pub struct TypeIgnoreTypeIgnore { - pub lineno: usize, + pub lineno: u32, pub tag: String, } @@ -1019,6 +1019,7 @@ pub mod fold { type TargetU; type Error; fn map_user(&mut self, user: U) -> Result; + fn map_located( &mut self, located: Attributed, @@ -1030,6 +1031,13 @@ pub mod fold { node: located.node, }) } + + fn fold>( + &mut self, + node: X, + ) -> Result { + node.fold(self) + } fn fold_mod(&mut self, node: Mod) -> Result, Self::Error> { fold_mod(self, node) } diff --git a/ast/src/gen/located.rs b/ast/src/gen/located.rs index 84ef9596..0e71a3c8 100644 --- a/ast/src/gen/located.rs +++ b/ast/src/gen/located.rs @@ -1,6 +1,6 @@ // File automatically generated by ast/asdl_rs.py. -use crate::location::SourceRange; +use rustpython_parser_core::source_code::SourceRange; pub type Located = super::generic::Attributed; pub type Mod = super::generic::Mod; diff --git a/ast/src/lib.rs b/ast/src/lib.rs index 56c62639..683698c0 100644 --- a/ast/src/lib.rs +++ b/ast/src/lib.rs @@ -7,53 +7,21 @@ mod generic { include!("gen/generic.rs"); } mod impls; -#[cfg(feature = "location")] -pub mod located { - include!("gen/located.rs"); -} -#[cfg(feature = "location")] -mod locator; -#[cfg(feature = "location")] -pub use crate::locator::locate; -#[cfg(feature = "location")] -pub use rustpython_compiler_core::SourceLocator; - +#[cfg(feature = "source-code")] +mod source_locator; #[cfg(feature = "unparse")] mod unparse; pub use attributed::Attributed; -pub use constant::{Constant, ConversionFlag}; +pub use constant::Constant; pub use generic::*; +pub use rustpython_parser_core::{text_size, ConversionFlag}; pub type Suite = Vec>; -pub mod location { - pub use rustpython_compiler_core::source_code::{OneIndexed, SourceLocation}; - - #[derive(Debug)] - pub struct SourceRange { - pub start: SourceLocation, - pub end: Option, - } - - impl SourceRange { - pub fn new(start: SourceLocation, end: SourceLocation) -> Self { - Self { - start, - end: Some(end), - } - } - pub fn unwrap_end(&self) -> SourceLocation { - self.end.unwrap() - } - } - - impl From> for SourceRange { - fn from(value: std::ops::Range) -> Self { - Self { - start: value.start, - end: Some(value.end), - } - } - } +#[cfg(feature = "source-code")] +pub mod located { + include!("gen/located.rs"); } + +pub use rustpython_parser_core::source_code; diff --git a/ast/src/locator.rs b/ast/src/source_locator.rs similarity index 73% rename from ast/src/locator.rs rename to ast/src/source_locator.rs index 888161ca..bc7055d7 100644 --- a/ast/src/locator.rs +++ b/ast/src/source_locator.rs @@ -1,11 +1,5 @@ use crate::attributed::Attributed; -use crate::fold_helpers::Foldable; -use crate::location::SourceRange; -use rustpython_compiler_core::SourceLocator; - -pub fn locate>(locator: &mut SourceLocator, ast: X) -> X::Mapped { - ast.fold(locator).unwrap() -} +use rustpython_parser_core::source_code::{SourceLocator, SourceRange}; impl crate::fold::Fold<()> for SourceLocator<'_> { type TargetU = SourceRange; diff --git a/ast/src/unparse.rs b/ast/src/unparse.rs index 807b0f16..575cf40d 100644 --- a/ast/src/unparse.rs +++ b/ast/src/unparse.rs @@ -1,7 +1,5 @@ -use crate::{ - Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, ConversionFlag, Expr, ExprKind, - Operator, -}; +use crate::ConversionFlag; +use crate::{Arg, Arguments, Boolop, Cmpop, Comprehension, Constant, Expr, ExprKind, Operator}; use std::fmt; mod precedence { @@ -452,7 +450,7 @@ impl<'a> Unparser<'a> { fn unparse_formatted( &mut self, val: &Expr, - conversion: usize, + conversion: u32, spec: Option<&Expr>, ) -> fmt::Result { let buffered = to_string_fmt(|f| Unparser::new(f).unparse_expr(val, precedence::TEST + 1)); @@ -466,7 +464,7 @@ impl<'a> Unparser<'a> { self.p(&buffered)?; drop(buffered); - if conversion != ConversionFlag::None as usize { + if conversion != ConversionFlag::None as u32 { self.p("!")?; let buf = &[conversion as u8]; let c = std::str::from_utf8(buf).unwrap(); diff --git a/core/Cargo.toml b/core/Cargo.toml index c8add56a..d4c8043c 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "rustpython-compiler-core" -description = "RustPython specific bytecode." +name = "rustpython-parser-core" +description = "RustPython parser data types." version = "0.2.0" authors = ["RustPython Team"] edition = "2021" @@ -18,3 +18,6 @@ ruff_python_ast = { workspace = true } lz4_flex = "0.9.2" +[features] +default = ["source-code"] +source-code = [] diff --git a/core/src/bytecode.rs b/core/src/bytecode.rs deleted file mode 100644 index 842d4992..00000000 --- a/core/src/bytecode.rs +++ /dev/null @@ -1,1613 +0,0 @@ -//! Implement python as a virtual machine with bytecode. This module -//! implements bytecode structure. - -use crate::{ - marshal, - source_code::{OneIndexed, SourceLocation}, -}; -use bitflags::bitflags; -use itertools::Itertools; -use num_bigint::BigInt; -use num_complex::Complex64; -use std::marker::PhantomData; -use std::{collections::BTreeSet, fmt, hash, mem}; - -pub trait Constant: Sized { - type Name: AsRef; - - /// Transforms the given Constant to a BorrowedConstant - fn borrow_constant(&self) -> BorrowedConstant; -} - -impl Constant for ConstantData { - type Name = String; - fn borrow_constant(&self) -> BorrowedConstant { - use BorrowedConstant::*; - match self { - ConstantData::Integer { value } => Integer { value }, - ConstantData::Float { value } => Float { value: *value }, - ConstantData::Complex { value } => Complex { value: *value }, - ConstantData::Boolean { value } => Boolean { value: *value }, - ConstantData::Str { value } => Str { value }, - ConstantData::Bytes { value } => Bytes { value }, - ConstantData::Code { code } => Code { code }, - ConstantData::Tuple { elements } => Tuple { elements }, - ConstantData::None => None, - ConstantData::Ellipsis => Ellipsis, - } - } -} - -/// A Constant Bag -pub trait ConstantBag: Sized + Copy { - type Constant: Constant; - fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant; - fn make_int(&self, value: BigInt) -> Self::Constant; - fn make_tuple(&self, elements: impl Iterator) -> Self::Constant; - fn make_code(&self, code: CodeObject) -> Self::Constant; - fn make_name(&self, name: &str) -> ::Name; -} - -pub trait AsBag { - type Bag: ConstantBag; - #[allow(clippy::wrong_self_convention)] - fn as_bag(self) -> Self::Bag; -} - -impl AsBag for Bag { - type Bag = Self; - fn as_bag(self) -> Self { - self - } -} - -#[derive(Clone, Copy)] -pub struct BasicBag; - -impl ConstantBag for BasicBag { - type Constant = ConstantData; - fn make_constant(&self, constant: BorrowedConstant) -> Self::Constant { - constant.to_owned() - } - fn make_int(&self, value: BigInt) -> Self::Constant { - ConstantData::Integer { value } - } - fn make_tuple(&self, elements: impl Iterator) -> Self::Constant { - ConstantData::Tuple { - elements: elements.collect(), - } - } - fn make_code(&self, code: CodeObject) -> Self::Constant { - ConstantData::Code { - code: Box::new(code), - } - } - fn make_name(&self, name: &str) -> ::Name { - name.to_owned() - } -} - -/// Primary container of a single code object. Each python function has -/// a code object. Also a module has a code object. -#[derive(Clone)] -pub struct CodeObject { - pub instructions: Box<[CodeUnit]>, - pub locations: Box<[SourceLocation]>, - pub flags: CodeFlags, - pub posonlyarg_count: u32, - // Number of positional-only arguments - pub arg_count: u32, - pub kwonlyarg_count: u32, - pub source_path: C::Name, - pub first_line_number: OneIndexed, - pub max_stackdepth: u32, - pub obj_name: C::Name, - // Name of the object that created this code object - pub cell2arg: Option>, - pub constants: Box<[C]>, - pub names: Box<[C::Name]>, - pub varnames: Box<[C::Name]>, - pub cellvars: Box<[C::Name]>, - pub freevars: Box<[C::Name]>, -} - -bitflags! { - pub struct CodeFlags: u16 { - const NEW_LOCALS = 0x01; - const IS_GENERATOR = 0x02; - const IS_COROUTINE = 0x04; - const HAS_VARARGS = 0x08; - const HAS_VARKEYWORDS = 0x10; - const IS_OPTIMIZED = 0x20; - } -} - -impl CodeFlags { - pub const NAME_MAPPING: &'static [(&'static str, CodeFlags)] = &[ - ("GENERATOR", CodeFlags::IS_GENERATOR), - ("COROUTINE", CodeFlags::IS_COROUTINE), - ( - "ASYNC_GENERATOR", - Self::from_bits_truncate(Self::IS_GENERATOR.bits | Self::IS_COROUTINE.bits), - ), - ("VARARGS", CodeFlags::HAS_VARARGS), - ("VARKEYWORDS", CodeFlags::HAS_VARKEYWORDS), - ]; -} - -/// an opcode argument that may be extended by a prior ExtendedArg -#[derive(Copy, Clone, PartialEq, Eq)] -#[repr(transparent)] -pub struct OpArgByte(pub u8); -impl OpArgByte { - pub const fn null() -> Self { - OpArgByte(0) - } -} -impl fmt::Debug for OpArgByte { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.0.fmt(f) - } -} - -/// a full 32-bit op_arg, including any possible ExtendedArg extension -#[derive(Copy, Clone, Debug)] -#[repr(transparent)] -pub struct OpArg(pub u32); -impl OpArg { - pub const fn null() -> Self { - OpArg(0) - } - - /// Returns how many CodeUnits a instruction with this op_arg will be encoded as - #[inline] - pub fn instr_size(self) -> usize { - (self.0 > 0xff) as usize + (self.0 > 0xff_ff) as usize + (self.0 > 0xff_ff_ff) as usize + 1 - } - - /// returns the arg split into any necessary ExtendedArg components (in big-endian order) and - /// the arg for the real opcode itself - #[inline(always)] - pub fn split(self) -> (impl ExactSizeIterator, OpArgByte) { - let mut it = self - .0 - .to_le_bytes() - .map(OpArgByte) - .into_iter() - .take(self.instr_size()); - let lo = it.next().unwrap(); - (it.rev(), lo) - } -} - -#[derive(Default, Copy, Clone)] -#[repr(transparent)] -pub struct OpArgState { - state: u32, -} - -impl OpArgState { - #[inline(always)] - pub fn get(&mut self, ins: CodeUnit) -> (Instruction, OpArg) { - let arg = self.extend(ins.arg); - if ins.op != Instruction::ExtendedArg { - self.reset(); - } - (ins.op, arg) - } - #[inline(always)] - pub fn extend(&mut self, arg: OpArgByte) -> OpArg { - self.state = self.state << 8 | u32::from(arg.0); - OpArg(self.state) - } - #[inline(always)] - pub fn reset(&mut self) { - self.state = 0 - } -} - -pub trait OpArgType: Copy { - fn from_op_arg(x: u32) -> Option; - fn to_op_arg(self) -> u32; -} - -impl OpArgType for u32 { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(x) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self - } -} - -impl OpArgType for bool { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(x != 0) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self as u32 - } -} - -macro_rules! op_arg_enum { - ($(#[$attr:meta])* $vis:vis enum $name:ident { $($(#[$var_attr:meta])* $var:ident = $value:literal,)* }) => { - $(#[$attr])* - $vis enum $name { - $($(#[$var_attr])* $var = $value,)* - } - - impl OpArgType for $name { - fn to_op_arg(self) -> u32 { - self as u32 - } - fn from_op_arg(x: u32) -> Option { - Some(match u8::try_from(x).ok()? { - $($value => Self::$var,)* - _ => return None, - }) - } - } - }; -} - -#[derive(Copy, Clone)] -pub struct Arg(PhantomData); - -impl Arg { - #[inline] - pub fn marker() -> Self { - Arg(PhantomData) - } - #[inline] - pub fn new(arg: T) -> (Self, OpArg) { - (Self(PhantomData), OpArg(arg.to_op_arg())) - } - #[inline] - pub fn new_single(arg: T) -> (Self, OpArgByte) - where - T: Into, - { - (Self(PhantomData), OpArgByte(arg.into())) - } - #[inline(always)] - pub fn get(self, arg: OpArg) -> T { - self.try_get(arg).unwrap() - } - #[inline(always)] - pub fn try_get(self, arg: OpArg) -> Option { - T::from_op_arg(arg.0) - } - #[inline(always)] - /// # Safety - /// T::from_op_arg(self) must succeed - pub unsafe fn get_unchecked(self, arg: OpArg) -> T { - match T::from_op_arg(arg.0) { - Some(t) => t, - None => std::hint::unreachable_unchecked(), - } - } -} - -impl PartialEq for Arg { - fn eq(&self, _: &Self) -> bool { - true - } -} -impl Eq for Arg {} - -impl fmt::Debug for Arg { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Arg<{}>", std::any::type_name::()) - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Ord, PartialOrd)] -#[repr(transparent)] -// XXX: if you add a new instruction that stores a Label, make sure to add it in -// Instruction::label_arg -pub struct Label(pub u32); - -impl OpArgType for Label { - #[inline(always)] - fn from_op_arg(x: u32) -> Option { - Some(Label(x)) - } - #[inline(always)] - fn to_op_arg(self) -> u32 { - self.0 - } -} - -impl fmt::Display for Label { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.0.fmt(f) - } -} - -op_arg_enum!( - /// Transforms a value prior to formatting it. - #[derive(Copy, Clone, Debug, PartialEq, Eq)] - #[repr(u8)] - pub enum ConversionFlag { - /// No conversion - None = 0, // CPython uses -1 but not pleasure for us - /// Converts by calling `str()`. - Str = b's', - /// Converts by calling `ascii()`. - Ascii = b'a', - /// Converts by calling `repr()`. - Repr = b'r', - } -); - -impl TryFrom for ConversionFlag { - type Error = usize; - fn try_from(b: usize) -> Result { - u32::try_from(b).ok().and_then(Self::from_op_arg).ok_or(b) - } -} - -op_arg_enum!( - /// The kind of Raise that occurred. - #[derive(Copy, Clone, Debug, PartialEq, Eq)] - #[repr(u8)] - pub enum RaiseKind { - Reraise = 0, - Raise = 1, - RaiseCause = 2, - } -); - -pub type NameIdx = u32; - -/// A Single bytecode instruction. -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -#[repr(u8)] -pub enum Instruction { - /// Importing by name - ImportName { - idx: Arg, - }, - /// Importing without name - ImportNameless, - /// Import * - ImportStar, - /// from ... import ... - ImportFrom { - idx: Arg, - }, - LoadFast(Arg), - LoadNameAny(Arg), - LoadGlobal(Arg), - LoadDeref(Arg), - LoadClassDeref(Arg), - StoreFast(Arg), - StoreLocal(Arg), - StoreGlobal(Arg), - StoreDeref(Arg), - DeleteFast(Arg), - DeleteLocal(Arg), - DeleteGlobal(Arg), - DeleteDeref(Arg), - LoadClosure(Arg), - Subscript, - StoreSubscript, - DeleteSubscript, - StoreAttr { - idx: Arg, - }, - DeleteAttr { - idx: Arg, - }, - LoadConst { - /// index into constants vec - idx: Arg, - }, - UnaryOperation { - op: Arg, - }, - BinaryOperation { - op: Arg, - }, - BinaryOperationInplace { - op: Arg, - }, - LoadAttr { - idx: Arg, - }, - TestOperation { - op: Arg, - }, - CompareOperation { - op: Arg, - }, - Pop, - Rotate2, - Rotate3, - Duplicate, - Duplicate2, - GetIter, - Continue { - target: Arg