Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: test dead instruction eliminator improvement #7806

Closed
wants to merge 5 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
324 changes: 314 additions & 10 deletions noir/noir-repo/compiler/noirc_evaluator/src/ssa/opt/die.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
//! which the results are unused.
use std::collections::HashSet;

use im::Vector;
use noirc_errors::Location;

use crate::ssa::{
ir::{
basic_block::{BasicBlock, BasicBlockId},
dfg::DataFlowGraph,
function::Function,
instruction::{Instruction, InstructionId, Intrinsic},
instruction::{Binary, BinaryOp, Instruction, InstructionId, Intrinsic},
post_order::PostOrder,
types::Type,
value::{Value, ValueId},
},
ssa_gen::Ssa,
ssa_gen::{Ssa, SSA_WORD_SIZE},
};

impl Ssa {
Expand All @@ -20,7 +24,7 @@ impl Ssa {
#[tracing::instrument(level = "trace", skip(self))]
pub(crate) fn dead_instruction_elimination(mut self) -> Ssa {
for function in self.functions.values_mut() {
dead_instruction_elimination(function);
dead_instruction_elimination(function, true);
}
self
}
Expand All @@ -32,16 +36,29 @@ impl Ssa {
/// instructions that reference results from an instruction in another block are evaluated first.
/// If we did not iterate blocks in this order we could not safely say whether or not the results
/// of its instructions are needed elsewhere.
fn dead_instruction_elimination(function: &mut Function) {
fn dead_instruction_elimination(function: &mut Function, insert_out_of_bounds_checks: bool) {
let mut context = Context::default();
for call_data in &function.dfg.data_bus.call_data {
context.mark_used_instruction_results(&function.dfg, call_data.array_id);
}

let blocks = PostOrder::with_function(function);
let mut inserted_out_of_bounds_checks = false;

let blocks = PostOrder::with_function(function);
for block in blocks.as_slice() {
context.remove_unused_instructions_in_block(function, *block);
inserted_out_of_bounds_checks |= context.remove_unused_instructions_in_block(
function,
*block,
insert_out_of_bounds_checks,
);
}

// If we inserted out of bounds check, let's run the pass again with those new
// instructions (we don't want to remove those checks, or instructions that are
// dependencies of those checks)
if inserted_out_of_bounds_checks {
dead_instruction_elimination(function, false);
return;
}

context.remove_rc_instructions(&mut function.dfg);
Expand Down Expand Up @@ -71,20 +88,40 @@ impl Context {
/// values set. This allows DIE to identify whole chains of unused instructions. (If the
/// values referenced by an unused instruction were considered to be used, only the head of
/// such chains would be removed.)
///
/// If `insert_out_of_bounds_checks` is true and there are unused ArrayGet/ArraySet that
/// might be out of bounds, this method will insert out of bounds checks instead of
/// removing unused instructions and return `true`. The idea then is to later call this
/// function again with `insert_out_of_bounds_checks` set to false to effectively remove
/// unused instructions but leave the out of bounds checks.
fn remove_unused_instructions_in_block(
&mut self,
function: &mut Function,
block_id: BasicBlockId,
) {
insert_out_of_bounds_checks: bool,
) -> bool {
let block = &function.dfg[block_id];
self.mark_terminator_values_as_used(function, block);

for instruction_id in block.instructions().iter().rev() {
let instructions_len = block.instructions().len();

// Indexes of instructions that might be out of bounds.
// We'll remove those, but before that we'll insert bounds checks for them.
let mut possible_index_out_of_bounds_indexes = Vec::new();

for (instruction_index, instruction_id) in block.instructions().iter().rev().enumerate() {
let instruction = &function.dfg[*instruction_id];

if self.is_unused(*instruction_id, function) {
self.instructions_to_remove.insert(*instruction_id);
} else {
let instruction = &function.dfg[*instruction_id];

if insert_out_of_bounds_checks
&& instruction_might_result_in_out_of_bounds(function, instruction)
{
possible_index_out_of_bounds_indexes
.push(instructions_len - instruction_index - 1);
}
} else {
use Instruction::*;
if matches!(instruction, IncrementRc { .. } | DecrementRc { .. }) {
self.rc_instructions.push((*instruction_id, block_id));
Expand All @@ -96,9 +133,204 @@ impl Context {
}
}

// If there are some instructions that might trigger an out of bounds error,
// first add constrain checks. Then run the DIE pass again, which will remove those
// but leave the constrains (any any value needed by those constrains)
if !possible_index_out_of_bounds_indexes.is_empty() {
self.insert_out_of_bounds_checks(
function,
block_id,
&mut possible_index_out_of_bounds_indexes,
);
return true;
}

function.dfg[block_id]
.instructions_mut()
.retain(|instruction| !self.instructions_to_remove.contains(instruction));

false
}

fn insert_out_of_bounds_checks(
&mut self,
function: &mut Function,
block_id: BasicBlockId,
possible_index_out_of_bounds_indexes: &mut Vec<usize>,
) {
// Keep track of the current side effects condition
let mut side_effects_condition = None;

// Keep track of the next index we need to handle
let mut next_out_of_bounds_index = possible_index_out_of_bounds_indexes.pop();

let instructions = function.dfg[block_id].take_instructions();
for (index, instruction_id) in instructions.iter().enumerate() {
let instruction_id = *instruction_id;
let instruction = &function.dfg[instruction_id];

if let Instruction::EnableSideEffects { condition } = instruction {
side_effects_condition = Some(*condition);

// We still need to keep the EnableSideEffects instruction
function.dfg[block_id].instructions_mut().push(instruction_id);
continue;
};

// If it's an ArrayGet we'll deal with groups of it in case the array type is a composite type.
if let Instruction::ArrayGet { array, .. } = instruction {
if let Some(array_length) = function.dfg.try_get_array_length(*array) {
let flattened_size = function.dfg.type_of_value(*array).flattened_size();
let element_size = flattened_size / array_length;
if element_size > 1 {
// It's a composite type.
// When doing ArrayGet on a composite type, this **always** results in instructions like these
// (assuming element_size == 3):
//
// 1. v27 = array_get v1, index v26
// 2. v28 = add v26, u32 1
// 3. v29 = array_get v1, index v28
// 4. v30 = add v26, u32 2
// 5. v31 = array_get v1, index v30
//
// That means that after this instructions, (element_size - 1) instructions will be
// part of this composite array get, and they'll be two instructions apart.
//
// Now three things can happen:
// a) none of the array_get instructions are unused: in this case they won't be in
// `possible_index_out_of_bounds_indexes` and they won't be removed, nothing to do here
// b) all of the array_get instructions are unused: in this case we can replace **all**
// of them with just one constrain: no need to do one per array_get
// c) some of the array_get instructions are unused, but not all: in this case
// we don't need to insert any constrain, because on a later stage array bound checks
// will be performed anyway. We'll let DIE remove the unused ones, without replacing
// them with bounds checks, and leave the used ones.
//
// To check in which scenario we are we can get from `possible_index_out_of_bounds_indexes`
// (starting from `next_out_of_bounds_index`) while we are in the group ranges
// (1..=5 in the example above)

if let Some(out_of_bounds_index) = next_out_of_bounds_index {
if index == out_of_bounds_index {
// What's the last instruction that's part of the group? (5 in the example above)
let last_instruction_index = index + 2 * (element_size - 1);
// How many unused instructions are in this group?
let mut unused_count = 1;
loop {
next_out_of_bounds_index =
possible_index_out_of_bounds_indexes.pop();
if let Some(out_of_bounds_index) = next_out_of_bounds_index {
if out_of_bounds_index <= last_instruction_index {
unused_count += 1;
if unused_count == element_size {
// We are in case b): we need to insert just one constrain.
// Since we popped all of the group indexes, and given that we
// are analyzing the first instruction in the group, we can
// set `next_out_of_bounds_index` to the current index:
// then a check will be inserted, and no other checkk will be
// inserted for the rest of the group.
next_out_of_bounds_index = Some(index);
break;
} else {
continue;
}
}
}

// We are in case c): some of the instructions are unused.
// We don't need to insert any checks, and given that we already popped
// all of the indexes in the group, there's nothing else to do here.
break;
}
}
}

// else (for any of the if's above): the next index is not the one for the current instructions,
// so we are in case a), and nothing needs to be done here.
}
}
}

let Some(out_of_bounds_index) = next_out_of_bounds_index else {
// No more out of bounds instructions to insert, just push the current instruction
function.dfg[block_id].instructions_mut().push(instruction_id);
continue;
};

if index != out_of_bounds_index {
// This instruction is not out of bounds: let's just push it
function.dfg[block_id].instructions_mut().push(instruction_id);
continue;
}

// This is an instruction that might be out of bounds: let's add a constrain.
let call_stack = function.dfg.get_call_stack(instruction_id);

let (array, index) = match instruction {
Instruction::ArrayGet { array, index }
| Instruction::ArraySet { array, index, .. } => (array, index),
_ => panic!("Expected an ArrayGet or ArraySet instruction here"),
};

let (lhs, rhs) = if function.dfg.get_numeric_constant(*index).is_some() {
// If we are here it means the index is known but out of bounds. That's always an error!
let false_const = function.dfg.make_constant(false.into(), Type::bool());
let true_const = function.dfg.make_constant(true.into(), Type::bool());
(false_const, true_const)
} else {
// `index` will be relative to the flattened array length, so we need to take that into account
let array_length = function.dfg.type_of_value(*array).flattened_size();

// If we are here it means the index is dynamic, so let's add a check that it's less than length
let index = function
.dfg
.insert_instruction_and_results(
Instruction::Cast(*index, Type::unsigned(SSA_WORD_SIZE)),
block_id,
None,
call_stack.clone(),
)
.first();

let array_length = function
.dfg
.make_constant((array_length as u128).into(), Type::unsigned(SSA_WORD_SIZE));
let is_index_out_of_bounds = function
.dfg
.insert_instruction_and_results(
Instruction::Binary(Binary {
operator: BinaryOp::Lt,
lhs: index,
rhs: array_length,
}),
block_id,
None,
call_stack.clone(),
)
.first();
let true_const = function.dfg.make_constant(true.into(), Type::bool());
(is_index_out_of_bounds, true_const)
};

let (lhs, rhs) = apply_side_effects(
side_effects_condition,
lhs,
rhs,
function,
block_id,
call_stack.clone(),
);

let message = Some("Index out of bounds".to_owned().into());
function.dfg.insert_instruction_and_results(
Instruction::Constrain(lhs, rhs, message),
block_id,
None,
call_stack,
);

next_out_of_bounds_index = possible_index_out_of_bounds_indexes.pop();
}
}

/// Returns true if an instruction can be removed.
Expand Down Expand Up @@ -168,6 +400,78 @@ impl Context {
}
}

fn instruction_might_result_in_out_of_bounds(
function: &Function,
instruction: &Instruction,
) -> bool {
use Instruction::*;
match instruction {
ArrayGet { array, index } | ArraySet { array, index, .. } => {
if function.dfg.try_get_array_length(*array).is_some() {
if let Some(known_index) = function.dfg.get_numeric_constant(*index) {
// `index` will be relative to the flattened array length, so we need to take that into account
let typ = function.dfg.type_of_value(*array);
let array_length = typ.flattened_size();
known_index >= array_length.into()
} else {
// A dynamic index might always be out of bounds
true
}
} else {
// Slice operations might be out of bounds, but there's no way we
// can insert a check because we don't know a slice's length
false
}
}
_ => false,
}
}

fn apply_side_effects(
side_effects_condition: Option<ValueId>,
lhs: ValueId,
rhs: ValueId,
function: &mut Function,
block_id: BasicBlockId,
call_stack: Vector<Location>,
) -> (ValueId, ValueId) {
// See if there's an active "enable side effects" condition
let Some(condition) = side_effects_condition else {
return (lhs, rhs);
};

// Condition needs to be cast to argument type in order to multiply them together.
// In our case, lhs is always a boolean.
let casted_condition = function
.dfg
.insert_instruction_and_results(
Instruction::Cast(condition, Type::bool()),
block_id,
None,
call_stack.clone(),
)
.first();
let lhs = function
.dfg
.insert_instruction_and_results(
Instruction::binary(BinaryOp::Mul, lhs, casted_condition),
block_id,
None,
call_stack.clone(),
)
.first();
let rhs = function
.dfg
.insert_instruction_and_results(
Instruction::binary(BinaryOp::Mul, rhs, casted_condition),
block_id,
None,
call_stack,
)
.first();
(lhs, rhs)
}

#[cfg(test)]
mod test {
use crate::ssa::{
Expand Down
Loading