diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..b477b82 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,5 @@ +indent_style="Block" +imports_indent="Block" +use_try_shorthand=true +use_field_init_shorthand=true +merge_imports=true diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..3cfcef2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: rust + +rust: + - stable + - nightly + +before_script: + - "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || rustup component add rustfmt" + +script: + - cargo build --tests + - cargo test + - "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || cargo fmt --all -- --check" + +cache: cargo diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a74a7e7 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "ykmir" +version = "0.1.0" +authors = ["Edd Barrett "] +edition = "2018" + +[dependencies] +serde = "1.0" +serde_derive = "1.0" +fallible-iterator = "0.1" + +# We are using a git version to work around a bug: +# https://github.com/3Hren/msgpack-rust/issues/183 +[dependencies.rmp-serde] +git = "https://github.com/3Hren/msgpack-rust" +rev = "40b3d480b20961e6eeceb416b32bcd0a3383846a" diff --git a/README.md b/README.md index 8b66cc8..9d70c3a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ -# ykmir -MIR encoder / decoder for Yorick. +# ykpack + +Pack encoder / decoder for Yorick. + +This library allows ykrustc to serialise various compile-time information for +later reading at run-time. diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 0000000..6cc654d --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,40 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::Pack; +use fallible_iterator::FallibleIterator; +use rmp_serde::{ + decode::{self, ReadReader}, + Deserializer, +}; +use serde::Deserialize; +use std::io::Read; + +/// The pack decoder. +/// Offers a simple iterator interface to serialised packs. +pub struct Decoder<'a> { + deser: Deserializer>, +} + +impl<'a> Decoder<'a> { + /// Returns a new decoder which will deserialise from `read_from`. + pub fn from(read_from: &'a mut dyn Read) -> Self { + let deser = Deserializer::new(read_from); + Self { deser } + } +} + +impl<'a> FallibleIterator for Decoder<'a> { + type Item = Pack; + type Error = decode::Error; + + fn next(&mut self) -> Result, Self::Error> { + Option::::deserialize(&mut self.deser) + } +} diff --git a/src/encode.rs b/src/encode.rs new file mode 100644 index 0000000..03ee64f --- /dev/null +++ b/src/encode.rs @@ -0,0 +1,50 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::Pack; +use rmp_serde::{encode, Serializer}; +use serde::Serialize; +use std::{io::prelude::*, ops::Drop}; + +/// The pack encoder. +/// +/// Packs are written using the `serialise()` method. Once all of the desired packs is serialised, +/// the consumer must call `done()`. +pub struct Encoder<'a> { + ser: Serializer<&'a mut dyn Write>, + done: bool, +} + +impl<'a> Encoder<'a> { + /// Creates a new encoder which serialises `Pack` into the writable `write_into`. + pub fn from(write_into: &'a mut dyn Write) -> Self { + let ser = Serializer::new(write_into); + Self { ser, done: false } + } + + /// Serialises a pack. + pub fn serialise(&mut self, md: Pack) -> Result<(), encode::Error> { + Some(md).serialize(&mut self.ser) + } + + /// Finalises the serialisation and writes a sentinel. + pub fn done(mut self) -> Result<(), encode::Error> { + None::>.serialize(&mut self.ser)?; + self.done = true; + Ok(()) + } +} + +impl<'a> Drop for Encoder<'a> { + fn drop(&mut self) { + if !self.done { + panic!("Encoder not marked done()"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b356bda --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,246 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// ykpack -- Serialiser and deserialiser for carrying data from compile-time to run-time. +/// +/// This crate allows ykrustc to serialise various compile-time information for later +/// deserialisation by the Yorick runtime. +/// +/// The encoder and decoder API is structured in such a way that each item -- or "Pack" -- can be +/// streamed to/from the serialised format one item at a time. This helps to reduce memory +/// consumption. +/// +/// The MIR data is serialised in the msgpack format in the following form: +/// +/// ----------- +/// pack_0: \ +/// ... - Packs. +/// pack_n / +/// sentinel -- End of packs marker. +/// ----------- +/// +/// Where each pack_i is an instance of `Some(Pack)` and the sentinel is a `None`. +/// +/// The version field is automatically written and checked by the `Encoder` and `Decoder` +/// respectively. + +#[macro_use] +extern crate serde_derive; + +mod decode; +mod encode; + +pub use decode::Decoder; +pub use encode::Encoder; + +pub type CrateHash = u64; +pub type DefIndex = u32; +pub type BasicBlockIndex = u32; + +/// A mirror of the compiler's notion of a "definition ID". +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct DefId { + crate_hash: CrateHash, + def_idx: DefIndex, +} + +impl DefId { + pub fn new(crate_hash: CrateHash, def_idx: DefIndex) -> Self { + Self { + crate_hash, + def_idx, + } + } +} + +/// A MIR. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct Mir { + def_id: DefId, + blocks: Vec, +} + +impl Mir { + /// Create a new MIR. + pub fn new(def_id: DefId, blocks: Vec) -> Self { + Self { def_id, blocks } + } +} + +/// A MIR block. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct BasicBlock { + stmts: Vec, + term: Terminator, +} + +impl BasicBlock { + /// Create a new MIR block. + pub fn new(stmts: Vec, term: Terminator) -> Self { + Self { stmts, term } + } +} + +/// A MIR statement. +/// FIXME to be populated. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Statement { + Nop, +} + +/// A call target. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum CallOperand { + /// A statically known function identified by its DefId. + Fn(DefId), + /// An unknown or unhandled callable. + Unknown, // FIXME -- Find out what else. Closures jump to mind. +} + +/// A MIR block terminator. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Terminator { + Goto { + target_bb: BasicBlockIndex, + }, + SwitchInt { + target_bbs: Vec, + }, + Resume, + Abort, + Return, + Unreachable, + Drop { + target_bb: BasicBlockIndex, + unwind_bb: Option, + }, + DropAndReplace { + target_bb: BasicBlockIndex, + unwind_bb: Option, + }, + Call { + operand: CallOperand, + cleanup_bb: Option, + }, + Assert { + target_bb: BasicBlockIndex, + cleanup_bb: Option, + }, + Yield { + resume_bb: BasicBlockIndex, + drop_bb: Option, + }, + GeneratorDrop, + FalseEdges { + real_target_bb: BasicBlockIndex, + }, + FalseUnwind { + real_target_bb: BasicBlockIndex, + }, +} + +/// The top-level pack type. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Pack { + Mir(Mir), +} + +#[cfg(test)] +mod tests { + use super::{BasicBlock, Decoder, DefId, Encoder, Mir, Pack, Statement, Terminator}; + use fallible_iterator::{self, FallibleIterator}; + use std::io::{Cursor, Seek, SeekFrom}; + + // Get a cursor to serialise to and deserialise from. For real, we'd be reading from a file, + // but for tests we use a vector of bytes. + fn get_curs() -> Cursor> { + let buf: Vec = Vec::new(); + Cursor::new(buf) + } + + // Rewind a cursor to the beginning. + fn rewind_curs(curs: &mut Cursor>) { + curs.seek(SeekFrom::Start(0)).unwrap(); + } + + // Makes some sample stuff to round trip test. + fn get_sample_packs() -> Vec { + let dummy_term = Terminator::Abort; + + let stmts1_b1 = vec![Statement::Nop; 16]; + let stmts1_b2 = vec![Statement::Nop; 3]; + let blocks1 = vec![ + BasicBlock::new(stmts1_b1, dummy_term.clone()), + BasicBlock::new(stmts1_b2, dummy_term.clone()), + ]; + let mir1 = Pack::Mir(Mir::new(DefId::new(1, 2), blocks1)); + + let stmts2_b1 = vec![Statement::Nop; 7]; + let stmts2_b2 = vec![Statement::Nop; 200]; + let stmts2_b3 = vec![Statement::Nop; 1]; + let blocks2 = vec![ + BasicBlock::new(stmts2_b1, dummy_term.clone()), + BasicBlock::new(stmts2_b2, dummy_term.clone()), + BasicBlock::new(stmts2_b3, dummy_term.clone()), + ]; + let mir2 = Pack::Mir(Mir::new(DefId::new(4, 5), blocks2)); + + vec![mir1, mir2] + } + + // Check serialising and deserialising works for zero packs. + #[test] + fn test_empty() { + let mut curs = get_curs(); + + let enc = Encoder::from(&mut curs); + enc.done().unwrap(); + + rewind_curs(&mut curs); + let mut dec = Decoder::from(&mut curs); + assert!(dec.next().unwrap().is_none()); + } + + // Check a typical serialising and deserialising session. + #[test] + fn test_basic() { + let inputs = get_sample_packs(); + let mut curs = get_curs(); + + let mut enc = Encoder::from(&mut curs); + for md in &inputs { + enc.serialise(md.clone()).unwrap(); + } + enc.done().unwrap(); + + rewind_curs(&mut curs); + let dec = Decoder::from(&mut curs); + + // Obtain two fallible iterators, so we can zip them. + let expect_iter = fallible_iterator::convert(inputs.into_iter().map(|e| Ok(e))); + + let mut itr = dec.zip(expect_iter); + while let Some((got, expect)) = itr.next().unwrap() { + assert_eq!(expect, got); + } + } + + #[test] + #[should_panic(expected = "not marked done")] + fn test_encode_not_done() { + let inputs = get_sample_packs(); + let mut curs = get_curs(); + + let mut enc = Encoder::from(&mut curs); + for md in &inputs { + enc.serialise(md.clone()).unwrap(); + } + // We expect this to panic, as the encoder wasn't finalised with a call to `enc.done()`. + } +}