From 4a4498026a5ca458933ab52b22944a8ae681d45e Mon Sep 17 00:00:00 2001 From: Edd Barrett Date: Mon, 4 Feb 2019 14:29:43 +0000 Subject: [PATCH] The first incarnation of the pack serialiser/deserialiser. This change adds a streamer "pack" encoder and decoder. The idea is that compile-time info can be serialised by ykrustc at compile-time and deserialised at run-time. For example the MIR will need to be consulted at runtime for code generation. We will have ykrustc serialise the MIR into packs which are stored in the resulting ELF file ready to be used later at run-time. --- .rustfmt.toml | 5 + .travis.yml | 15 +++ Cargo.toml | 16 ++++ README.md | 8 +- src/decode.rs | 40 ++++++++ src/encode.rs | 50 ++++++++++ src/lib.rs | 246 ++++++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 378 insertions(+), 2 deletions(-) create mode 100644 .rustfmt.toml create mode 100644 .travis.yml create mode 100644 Cargo.toml create mode 100644 src/decode.rs create mode 100644 src/encode.rs create mode 100644 src/lib.rs diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..b477b82 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,5 @@ +indent_style="Block" +imports_indent="Block" +use_try_shorthand=true +use_field_init_shorthand=true +merge_imports=true diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..3cfcef2 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,15 @@ +language: rust + +rust: + - stable + - nightly + +before_script: + - "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || rustup component add rustfmt" + +script: + - cargo build --tests + - cargo test + - "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || cargo fmt --all -- --check" + +cache: cargo diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..a74a7e7 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "ykmir" +version = "0.1.0" +authors = ["Edd Barrett "] +edition = "2018" + +[dependencies] +serde = "1.0" +serde_derive = "1.0" +fallible-iterator = "0.1" + +# We are using a git version to work around a bug: +# https://github.com/3Hren/msgpack-rust/issues/183 +[dependencies.rmp-serde] +git = "https://github.com/3Hren/msgpack-rust" +rev = "40b3d480b20961e6eeceb416b32bcd0a3383846a" diff --git a/README.md b/README.md index 8b66cc8..9d70c3a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ -# ykmir -MIR encoder / decoder for Yorick. +# ykpack + +Pack encoder / decoder for Yorick. + +This library allows ykrustc to serialise various compile-time information for +later reading at run-time. diff --git a/src/decode.rs b/src/decode.rs new file mode 100644 index 0000000..6cc654d --- /dev/null +++ b/src/decode.rs @@ -0,0 +1,40 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::Pack; +use fallible_iterator::FallibleIterator; +use rmp_serde::{ + decode::{self, ReadReader}, + Deserializer, +}; +use serde::Deserialize; +use std::io::Read; + +/// The pack decoder. +/// Offers a simple iterator interface to serialised packs. +pub struct Decoder<'a> { + deser: Deserializer>, +} + +impl<'a> Decoder<'a> { + /// Returns a new decoder which will deserialise from `read_from`. + pub fn from(read_from: &'a mut dyn Read) -> Self { + let deser = Deserializer::new(read_from); + Self { deser } + } +} + +impl<'a> FallibleIterator for Decoder<'a> { + type Item = Pack; + type Error = decode::Error; + + fn next(&mut self) -> Result, Self::Error> { + Option::::deserialize(&mut self.deser) + } +} diff --git a/src/encode.rs b/src/encode.rs new file mode 100644 index 0000000..03ee64f --- /dev/null +++ b/src/encode.rs @@ -0,0 +1,50 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::Pack; +use rmp_serde::{encode, Serializer}; +use serde::Serialize; +use std::{io::prelude::*, ops::Drop}; + +/// The pack encoder. +/// +/// Packs are written using the `serialise()` method. Once all of the desired packs is serialised, +/// the consumer must call `done()`. +pub struct Encoder<'a> { + ser: Serializer<&'a mut dyn Write>, + done: bool, +} + +impl<'a> Encoder<'a> { + /// Creates a new encoder which serialises `Pack` into the writable `write_into`. + pub fn from(write_into: &'a mut dyn Write) -> Self { + let ser = Serializer::new(write_into); + Self { ser, done: false } + } + + /// Serialises a pack. + pub fn serialise(&mut self, md: Pack) -> Result<(), encode::Error> { + Some(md).serialize(&mut self.ser) + } + + /// Finalises the serialisation and writes a sentinel. + pub fn done(mut self) -> Result<(), encode::Error> { + None::>.serialize(&mut self.ser)?; + self.done = true; + Ok(()) + } +} + +impl<'a> Drop for Encoder<'a> { + fn drop(&mut self) { + if !self.done { + panic!("Encoder not marked done()"); + } + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..b356bda --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,246 @@ +// Copyright 2019 King's College London. +// Created by the Software Development Team . +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/// ykpack -- Serialiser and deserialiser for carrying data from compile-time to run-time. +/// +/// This crate allows ykrustc to serialise various compile-time information for later +/// deserialisation by the Yorick runtime. +/// +/// The encoder and decoder API is structured in such a way that each item -- or "Pack" -- can be +/// streamed to/from the serialised format one item at a time. This helps to reduce memory +/// consumption. +/// +/// The MIR data is serialised in the msgpack format in the following form: +/// +/// ----------- +/// pack_0: \ +/// ... - Packs. +/// pack_n / +/// sentinel -- End of packs marker. +/// ----------- +/// +/// Where each pack_i is an instance of `Some(Pack)` and the sentinel is a `None`. +/// +/// The version field is automatically written and checked by the `Encoder` and `Decoder` +/// respectively. + +#[macro_use] +extern crate serde_derive; + +mod decode; +mod encode; + +pub use decode::Decoder; +pub use encode::Encoder; + +pub type CrateHash = u64; +pub type DefIndex = u32; +pub type BasicBlockIndex = u32; + +/// A mirror of the compiler's notion of a "definition ID". +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct DefId { + crate_hash: CrateHash, + def_idx: DefIndex, +} + +impl DefId { + pub fn new(crate_hash: CrateHash, def_idx: DefIndex) -> Self { + Self { + crate_hash, + def_idx, + } + } +} + +/// A MIR. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct Mir { + def_id: DefId, + blocks: Vec, +} + +impl Mir { + /// Create a new MIR. + pub fn new(def_id: DefId, blocks: Vec) -> Self { + Self { def_id, blocks } + } +} + +/// A MIR block. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub struct BasicBlock { + stmts: Vec, + term: Terminator, +} + +impl BasicBlock { + /// Create a new MIR block. + pub fn new(stmts: Vec, term: Terminator) -> Self { + Self { stmts, term } + } +} + +/// A MIR statement. +/// FIXME to be populated. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Statement { + Nop, +} + +/// A call target. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum CallOperand { + /// A statically known function identified by its DefId. + Fn(DefId), + /// An unknown or unhandled callable. + Unknown, // FIXME -- Find out what else. Closures jump to mind. +} + +/// A MIR block terminator. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Terminator { + Goto { + target_bb: BasicBlockIndex, + }, + SwitchInt { + target_bbs: Vec, + }, + Resume, + Abort, + Return, + Unreachable, + Drop { + target_bb: BasicBlockIndex, + unwind_bb: Option, + }, + DropAndReplace { + target_bb: BasicBlockIndex, + unwind_bb: Option, + }, + Call { + operand: CallOperand, + cleanup_bb: Option, + }, + Assert { + target_bb: BasicBlockIndex, + cleanup_bb: Option, + }, + Yield { + resume_bb: BasicBlockIndex, + drop_bb: Option, + }, + GeneratorDrop, + FalseEdges { + real_target_bb: BasicBlockIndex, + }, + FalseUnwind { + real_target_bb: BasicBlockIndex, + }, +} + +/// The top-level pack type. +#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] +pub enum Pack { + Mir(Mir), +} + +#[cfg(test)] +mod tests { + use super::{BasicBlock, Decoder, DefId, Encoder, Mir, Pack, Statement, Terminator}; + use fallible_iterator::{self, FallibleIterator}; + use std::io::{Cursor, Seek, SeekFrom}; + + // Get a cursor to serialise to and deserialise from. For real, we'd be reading from a file, + // but for tests we use a vector of bytes. + fn get_curs() -> Cursor> { + let buf: Vec = Vec::new(); + Cursor::new(buf) + } + + // Rewind a cursor to the beginning. + fn rewind_curs(curs: &mut Cursor>) { + curs.seek(SeekFrom::Start(0)).unwrap(); + } + + // Makes some sample stuff to round trip test. + fn get_sample_packs() -> Vec { + let dummy_term = Terminator::Abort; + + let stmts1_b1 = vec![Statement::Nop; 16]; + let stmts1_b2 = vec![Statement::Nop; 3]; + let blocks1 = vec![ + BasicBlock::new(stmts1_b1, dummy_term.clone()), + BasicBlock::new(stmts1_b2, dummy_term.clone()), + ]; + let mir1 = Pack::Mir(Mir::new(DefId::new(1, 2), blocks1)); + + let stmts2_b1 = vec![Statement::Nop; 7]; + let stmts2_b2 = vec![Statement::Nop; 200]; + let stmts2_b3 = vec![Statement::Nop; 1]; + let blocks2 = vec![ + BasicBlock::new(stmts2_b1, dummy_term.clone()), + BasicBlock::new(stmts2_b2, dummy_term.clone()), + BasicBlock::new(stmts2_b3, dummy_term.clone()), + ]; + let mir2 = Pack::Mir(Mir::new(DefId::new(4, 5), blocks2)); + + vec![mir1, mir2] + } + + // Check serialising and deserialising works for zero packs. + #[test] + fn test_empty() { + let mut curs = get_curs(); + + let enc = Encoder::from(&mut curs); + enc.done().unwrap(); + + rewind_curs(&mut curs); + let mut dec = Decoder::from(&mut curs); + assert!(dec.next().unwrap().is_none()); + } + + // Check a typical serialising and deserialising session. + #[test] + fn test_basic() { + let inputs = get_sample_packs(); + let mut curs = get_curs(); + + let mut enc = Encoder::from(&mut curs); + for md in &inputs { + enc.serialise(md.clone()).unwrap(); + } + enc.done().unwrap(); + + rewind_curs(&mut curs); + let dec = Decoder::from(&mut curs); + + // Obtain two fallible iterators, so we can zip them. + let expect_iter = fallible_iterator::convert(inputs.into_iter().map(|e| Ok(e))); + + let mut itr = dec.zip(expect_iter); + while let Some((got, expect)) = itr.next().unwrap() { + assert_eq!(expect, got); + } + } + + #[test] + #[should_panic(expected = "not marked done")] + fn test_encode_not_done() { + let inputs = get_sample_packs(); + let mut curs = get_curs(); + + let mut enc = Encoder::from(&mut curs); + for md in &inputs { + enc.serialise(md.clone()).unwrap(); + } + // We expect this to panic, as the encoder wasn't finalised with a call to `enc.done()`. + } +}