Skip to content
This repository has been archived by the owner on Oct 8, 2019. It is now read-only.

Commit

Permalink
The first incarnation of the pack serialiser/deserialiser.
Browse files Browse the repository at this point in the history
This change adds a streamer "pack" encoder and decoder. The idea is that
compile-time info can be serialised by ykrustc at compile-time and
deserialised at run-time.

For example the MIR will need to be consulted at runtime for code
generation. We will have ykrustc serialise the MIR into packs which are
stored in the resulting ELF file ready to be used later at run-time.
  • Loading branch information
vext01 committed Feb 14, 2019
1 parent 3839aca commit 4a44980
Show file tree
Hide file tree
Showing 7 changed files with 378 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
indent_style="Block"
imports_indent="Block"
use_try_shorthand=true
use_field_init_shorthand=true
merge_imports=true
15 changes: 15 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
language: rust

rust:
- stable
- nightly

before_script:
- "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || rustup component add rustfmt"

script:
- cargo build --tests
- cargo test
- "[ \"$TRAVIS_RUST_VERSION\" == 'nightly' ] || cargo fmt --all -- --check"

cache: cargo
16 changes: 16 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "ykmir"
version = "0.1.0"
authors = ["Edd Barrett <[email protected]>"]
edition = "2018"

[dependencies]
serde = "1.0"
serde_derive = "1.0"
fallible-iterator = "0.1"

# We are using a git version to work around a bug:
# https://github.com/3Hren/msgpack-rust/issues/183
[dependencies.rmp-serde]
git = "https://github.com/3Hren/msgpack-rust"
rev = "40b3d480b20961e6eeceb416b32bcd0a3383846a"
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
# ykmir
MIR encoder / decoder for Yorick.
# ykpack

Pack encoder / decoder for Yorick.

This library allows ykrustc to serialise various compile-time information for
later reading at run-time.
40 changes: 40 additions & 0 deletions src/decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright 2019 King's College London.
// Created by the Software Development Team <http://soft-dev.org/>.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use crate::Pack;
use fallible_iterator::FallibleIterator;
use rmp_serde::{
decode::{self, ReadReader},
Deserializer,
};
use serde::Deserialize;
use std::io::Read;

/// The pack decoder.
/// Offers a simple iterator interface to serialised packs.
pub struct Decoder<'a> {
deser: Deserializer<ReadReader<&'a mut dyn Read>>,
}

impl<'a> Decoder<'a> {
/// Returns a new decoder which will deserialise from `read_from`.
pub fn from(read_from: &'a mut dyn Read) -> Self {
let deser = Deserializer::new(read_from);
Self { deser }
}
}

impl<'a> FallibleIterator for Decoder<'a> {
type Item = Pack;
type Error = decode::Error;

fn next(&mut self) -> Result<Option<Self::Item>, Self::Error> {
Option::<Pack>::deserialize(&mut self.deser)
}
}
50 changes: 50 additions & 0 deletions src/encode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Copyright 2019 King's College London.
// Created by the Software Development Team <http://soft-dev.org/>.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use crate::Pack;
use rmp_serde::{encode, Serializer};
use serde::Serialize;
use std::{io::prelude::*, ops::Drop};

/// The pack encoder.
///
/// Packs are written using the `serialise()` method. Once all of the desired packs is serialised,
/// the consumer must call `done()`.
pub struct Encoder<'a> {
ser: Serializer<&'a mut dyn Write>,
done: bool,
}

impl<'a> Encoder<'a> {
/// Creates a new encoder which serialises `Pack` into the writable `write_into`.
pub fn from(write_into: &'a mut dyn Write) -> Self {
let ser = Serializer::new(write_into);
Self { ser, done: false }
}

/// Serialises a pack.
pub fn serialise(&mut self, md: Pack) -> Result<(), encode::Error> {
Some(md).serialize(&mut self.ser)
}

/// Finalises the serialisation and writes a sentinel.
pub fn done(mut self) -> Result<(), encode::Error> {
None::<Option<Pack>>.serialize(&mut self.ser)?;
self.done = true;
Ok(())
}
}

impl<'a> Drop for Encoder<'a> {
fn drop(&mut self) {
if !self.done {
panic!("Encoder not marked done()");
}
}
}
246 changes: 246 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
// Copyright 2019 King's College London.
// Created by the Software Development Team <http://soft-dev.org/>.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

/// ykpack -- Serialiser and deserialiser for carrying data from compile-time to run-time.
///
/// This crate allows ykrustc to serialise various compile-time information for later
/// deserialisation by the Yorick runtime.
///
/// The encoder and decoder API is structured in such a way that each item -- or "Pack" -- can be
/// streamed to/from the serialised format one item at a time. This helps to reduce memory
/// consumption.
///
/// The MIR data is serialised in the msgpack format in the following form:
///
/// -----------
/// pack_0: \
/// ... - Packs.
/// pack_n /
/// sentinel -- End of packs marker.
/// -----------
///
/// Where each pack_i is an instance of `Some(Pack)` and the sentinel is a `None`.
///
/// The version field is automatically written and checked by the `Encoder` and `Decoder`
/// respectively.
#[macro_use]
extern crate serde_derive;

mod decode;
mod encode;

pub use decode::Decoder;
pub use encode::Encoder;

pub type CrateHash = u64;
pub type DefIndex = u32;
pub type BasicBlockIndex = u32;

/// A mirror of the compiler's notion of a "definition ID".
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub struct DefId {
crate_hash: CrateHash,
def_idx: DefIndex,
}

impl DefId {
pub fn new(crate_hash: CrateHash, def_idx: DefIndex) -> Self {
Self {
crate_hash,
def_idx,
}
}
}

/// A MIR.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub struct Mir {
def_id: DefId,
blocks: Vec<BasicBlock>,
}

impl Mir {
/// Create a new MIR.
pub fn new(def_id: DefId, blocks: Vec<BasicBlock>) -> Self {
Self { def_id, blocks }
}
}

/// A MIR block.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub struct BasicBlock {
stmts: Vec<Statement>,
term: Terminator,
}

impl BasicBlock {
/// Create a new MIR block.
pub fn new(stmts: Vec<Statement>, term: Terminator) -> Self {
Self { stmts, term }
}
}

/// A MIR statement.
/// FIXME to be populated.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum Statement {
Nop,
}

/// A call target.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum CallOperand {
/// A statically known function identified by its DefId.
Fn(DefId),
/// An unknown or unhandled callable.
Unknown, // FIXME -- Find out what else. Closures jump to mind.
}

/// A MIR block terminator.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum Terminator {
Goto {
target_bb: BasicBlockIndex,
},
SwitchInt {
target_bbs: Vec<BasicBlockIndex>,
},
Resume,
Abort,
Return,
Unreachable,
Drop {
target_bb: BasicBlockIndex,
unwind_bb: Option<BasicBlockIndex>,
},
DropAndReplace {
target_bb: BasicBlockIndex,
unwind_bb: Option<BasicBlockIndex>,
},
Call {
operand: CallOperand,
cleanup_bb: Option<BasicBlockIndex>,
},
Assert {
target_bb: BasicBlockIndex,
cleanup_bb: Option<BasicBlockIndex>,
},
Yield {
resume_bb: BasicBlockIndex,
drop_bb: Option<BasicBlockIndex>,
},
GeneratorDrop,
FalseEdges {
real_target_bb: BasicBlockIndex,
},
FalseUnwind {
real_target_bb: BasicBlockIndex,
},
}

/// The top-level pack type.
#[derive(Serialize, Deserialize, PartialEq, Eq, Debug, Clone)]
pub enum Pack {
Mir(Mir),
}

#[cfg(test)]
mod tests {
use super::{BasicBlock, Decoder, DefId, Encoder, Mir, Pack, Statement, Terminator};
use fallible_iterator::{self, FallibleIterator};
use std::io::{Cursor, Seek, SeekFrom};

// Get a cursor to serialise to and deserialise from. For real, we'd be reading from a file,
// but for tests we use a vector of bytes.
fn get_curs() -> Cursor<Vec<u8>> {
let buf: Vec<u8> = Vec::new();
Cursor::new(buf)
}

// Rewind a cursor to the beginning.
fn rewind_curs(curs: &mut Cursor<Vec<u8>>) {
curs.seek(SeekFrom::Start(0)).unwrap();
}

// Makes some sample stuff to round trip test.
fn get_sample_packs() -> Vec<Pack> {
let dummy_term = Terminator::Abort;

let stmts1_b1 = vec![Statement::Nop; 16];
let stmts1_b2 = vec![Statement::Nop; 3];
let blocks1 = vec![
BasicBlock::new(stmts1_b1, dummy_term.clone()),
BasicBlock::new(stmts1_b2, dummy_term.clone()),
];
let mir1 = Pack::Mir(Mir::new(DefId::new(1, 2), blocks1));

let stmts2_b1 = vec![Statement::Nop; 7];
let stmts2_b2 = vec![Statement::Nop; 200];
let stmts2_b3 = vec![Statement::Nop; 1];
let blocks2 = vec![
BasicBlock::new(stmts2_b1, dummy_term.clone()),
BasicBlock::new(stmts2_b2, dummy_term.clone()),
BasicBlock::new(stmts2_b3, dummy_term.clone()),
];
let mir2 = Pack::Mir(Mir::new(DefId::new(4, 5), blocks2));

vec![mir1, mir2]
}

// Check serialising and deserialising works for zero packs.
#[test]
fn test_empty() {
let mut curs = get_curs();

let enc = Encoder::from(&mut curs);
enc.done().unwrap();

rewind_curs(&mut curs);
let mut dec = Decoder::from(&mut curs);
assert!(dec.next().unwrap().is_none());
}

// Check a typical serialising and deserialising session.
#[test]
fn test_basic() {
let inputs = get_sample_packs();
let mut curs = get_curs();

let mut enc = Encoder::from(&mut curs);
for md in &inputs {
enc.serialise(md.clone()).unwrap();
}
enc.done().unwrap();

rewind_curs(&mut curs);
let dec = Decoder::from(&mut curs);

// Obtain two fallible iterators, so we can zip them.
let expect_iter = fallible_iterator::convert(inputs.into_iter().map(|e| Ok(e)));

let mut itr = dec.zip(expect_iter);
while let Some((got, expect)) = itr.next().unwrap() {
assert_eq!(expect, got);
}
}

#[test]
#[should_panic(expected = "not marked done")]
fn test_encode_not_done() {
let inputs = get_sample_packs();
let mut curs = get_curs();

let mut enc = Encoder::from(&mut curs);
for md in &inputs {
enc.serialise(md.clone()).unwrap();
}
// We expect this to panic, as the encoder wasn't finalised with a call to `enc.done()`.
}
}

0 comments on commit 4a44980

Please sign in to comment.