From bc3183b8a08f12b9c2f9b97af2bcc0f0138dbb7a Mon Sep 17 00:00:00 2001 From: Sonya <60201678+sschriner@users.noreply.github.com> Date: Wed, 24 Feb 2021 14:40:42 -0500 Subject: [PATCH] New control flow support (#98) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added support for remill kCategoryConditionalFunctionReturn, kCategoryConditionalIndirectJump, kCategoryConditionalDirectFunctionCall, kCategoryConditionalIndirectFunctionCall * Updated case statement * Remove Python2.7 support (#104) * Removes any mention of python2 * Removes more mentions of python2 * IDA: Add a simple action to generate spec files (#94) * IDA: Add a simple action to generate spec files * docs: Update the example instructions * Do not lift functions that are not in the JSON spec (#102) * Modifies lifting to ignore functions that do not have mapped bytes in the spec * Moves byte existence and executability check to LifFunction() and adds comments * Handling of Named Type references (#99) * Handling of named references Fix the handling of type cache and read bytes from memory * review changes * Add vector type lifting * add remill compat header for vector type Co-authored-by: AkshayK * Refactor the CMake project (#101) * CMake: Refactor * CMake: Update the copyright and license headers * CMake: Refactor * CMake: Refactor * Misc: Remove unused remill_commit_id file * CMake: Refactor * CMake: Refactor * CMake: Refactor * CMake: Refactor * docs: Update the dependencies in the README * CI: Update the GitHub Actions workflow * CI: Update the GitHub Actions workflow * CI: Update the GitHub Actions workflow * Packaging: Add DEB/RPM/TGZ for Linux, TGZ for macOS * CI: Automatically create a release when pushing a tag * CI: Include tags when obtaining version information * CI: Automatically abort stale workflows * CMake: Refactor * CI: Disable shallow clone to fix version detection * CI: Fix Python packaging * CMake: Refactor * CI: Update the release generator * CMake: Only install to system packages if not doing a dev install (#109) * Update build.yml (#112) Limit MacOS to LLVM 11 since we have a limited number of MacOS runners. * Add TypeCache for bn type lookup (#108) Adding assert to convert type function * CMake: Update default settings, fix packaging issue (#111) - Enables the tests and the install target in the default configuration - Fixes an issues with packaging, which didn't work correctly due to how DESTDIR was handled * Fix ce replace bug (#110) * Fixes a use of replaceAllUsesOf * Move the binja_var_none_type test to should-be passing. Also, make all stack frames packed, as the way the structure types are constructed assumes every element is adjacent in memory, with i8s explicitly filling gaps * Give __anvill_reg_XXX variables a default initializer to make compiling bitcode possible. Get rid of overly eager, evil optimization that tries to load constants from memory into allocas. Add instcombine to the set of optimizations for folding goodness * Adds a --print_registers_before_instuctions option to inject printfs into the bitcode to dump all address-sized integer registers to stdout before each instruction * Move binja_var_non_type back into failing tests for now Co-authored-by: Carson Harmon * Fix crash array size, unsupported reg, missing data var (#117) * fix crash due to array size and unsupported reg * Fixed assertion failure triggered in ret0.json Co-authored-by: Peter Goodman * Fix bytesequence and copypasta (#116) * Fix bytesequence and copypasta issues * Do variable references again * Update Program.h Useless change to force CI :-P Co-authored-by: Peter Goodman * Formats files and sets internal linkage to `__anvill_reg` globals (#118) * CI: Update asset names when handling tags (#115) * CI: Switch to the more reliable macOS 10.15 workers (#120) * CI: Use a single job to publish releases (#122) * CI: Automatically generate the release changelog (#123) * Updated VisitConditionalDirectFunctionCall and VisitConditionalIndirectJump * Delay slot fixes Co-authored-by: Marek Surovič Co-authored-by: Alessandro Gario <5714290+alessandrogario@users.noreply.github.com> Co-authored-by: kumarak Co-authored-by: AkshayK Co-authored-by: Artem Dinaburg Co-authored-by: Peter Goodman Co-authored-by: Carson Harmon Co-authored-by: Peter Goodman --- anvill/include/anvill/MCToIRLifter.h | 17 ++++++ anvill/src/MCToIRLifter.cpp | 91 ++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) diff --git a/anvill/include/anvill/MCToIRLifter.h b/anvill/include/anvill/MCToIRLifter.h index bb7f6865f..1b67b7289 100644 --- a/anvill/include/anvill/MCToIRLifter.h +++ b/anvill/include/anvill/MCToIRLifter.h @@ -113,10 +113,18 @@ class MCToIRLifter { void VisitIndirectJump(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block); + void VisitConditionalIndirectJump(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block); + void VisitFunctionReturn(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block); + void VisitConditionalFunctionReturn(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block); + std::pair LoadFunctionReturnAddress(const remill::Instruction &inst, llvm::BasicBlock *block); @@ -124,10 +132,19 @@ class MCToIRLifter { void VisitDirectFunctionCall(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block); + + void VisitConditionalDirectFunctionCall(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block); + void VisitIndirectFunctionCall(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block); + void VisitConditionalIndirectFunctionCall(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block); + void VisitAfterFunctionCall(const remill::Instruction &inst, llvm::BasicBlock *block); diff --git a/anvill/src/MCToIRLifter.cpp b/anvill/src/MCToIRLifter.cpp index c72546837..21b64e63c 100644 --- a/anvill/src/MCToIRLifter.cpp +++ b/anvill/src/MCToIRLifter.cpp @@ -129,6 +129,22 @@ void MCToIRLifter::VisitIndirectJump(const remill::Instruction &inst, remill::AddTerminatingTailCall(block, intrinsics.jump); } +void MCToIRLifter::VisitConditionalIndirectJump(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block) { + const auto lifted_func = block->getParent(); + const auto cond = remill::LoadBranchTaken(block); + const auto taken_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + const auto not_taken_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + llvm::BranchInst::Create(taken_block, not_taken_block, cond, block); + VisitDelayedInstruction(inst, delayed_inst, taken_block, true); + remill::AddTerminatingTailCall(taken_block, intrinsics.jump); + VisitDelayedInstruction(inst, delayed_inst, not_taken_block, false); + llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_not_taken_pc), + not_taken_block); +} + + void MCToIRLifter::VisitFunctionReturn(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block) { @@ -136,6 +152,21 @@ void MCToIRLifter::VisitFunctionReturn(const remill::Instruction &inst, llvm::ReturnInst::Create(ctx, remill::LoadMemoryPointer(block), block); } +void MCToIRLifter::VisitConditionalFunctionReturn(const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block) { + const auto lifted_func = block->getParent(); + const auto cond = remill::LoadBranchTaken(block); + const auto taken_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + const auto not_taken_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + llvm::BranchInst::Create(taken_block, not_taken_block, cond, block); + VisitDelayedInstruction(inst, delayed_inst, taken_block, true); + remill::AddTerminatingTailCall(taken_block, intrinsics.function_return); + VisitDelayedInstruction(inst, delayed_inst, not_taken_block, false); + llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_not_taken_pc), + not_taken_block); +} + // Figure out the fall-through return address for a function call. There are // annoying SPARC-isms to deal with due to their awful ABI choices. std::pair @@ -223,6 +254,35 @@ void MCToIRLifter::VisitDirectFunctionCall(const remill::Instruction &inst, VisitAfterFunctionCall(inst, block); } +void MCToIRLifter::VisitConditionalDirectFunctionCall( + const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block) { + const auto lifted_func = block->getParent(); + auto do_cond_call = llvm::BasicBlock::Create(ctx, "", lifted_func); + auto next_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + llvm::BranchInst::Create(do_cond_call, next_block, + remill::LoadBranchTaken(block), block); + + VisitDelayedInstruction(inst, delayed_inst, do_cond_call, true); + if (auto decl = program.FindFunction(inst.branch_taken_pc); decl) { + const auto entry = GetOrDeclareFunction(*decl); + remill::AddCall(do_cond_call, entry.lifted_to_native); + } else { + LOG(ERROR) << "Missing declaration for function at " << std::hex + << inst.branch_taken_pc << " called at " << inst.pc << std::dec; + + // If we do not have a function declaration, treat this as a call to an unknown address. + remill::AddCall(do_cond_call, intrinsics.function_call); + } + VisitAfterFunctionCall(inst, do_cond_call); + + VisitDelayedInstruction(inst, delayed_inst, next_block, false); + llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_not_taken_pc), + next_block); +} + + void MCToIRLifter::VisitIndirectFunctionCall(const remill::Instruction &inst, remill::Instruction *delayed_inst, llvm::BasicBlock *block) { @@ -232,6 +292,25 @@ void MCToIRLifter::VisitIndirectFunctionCall(const remill::Instruction &inst, VisitAfterFunctionCall(inst, block); } +void MCToIRLifter::VisitConditionalIndirectFunctionCall( + const remill::Instruction &inst, + remill::Instruction *delayed_inst, + llvm::BasicBlock *block) { + const auto lifted_func = block->getParent(); + auto do_cond_call = llvm::BasicBlock::Create(ctx, "", lifted_func); + auto next_block = llvm::BasicBlock::Create(ctx, "", lifted_func); + llvm::BranchInst::Create(do_cond_call, next_block, + remill::LoadBranchTaken(block), block); + + VisitDelayedInstruction(inst, delayed_inst, do_cond_call, true); + remill::AddCall(do_cond_call, intrinsics.function_call); + VisitAfterFunctionCall(inst, do_cond_call); + + VisitDelayedInstruction(inst, delayed_inst, next_block, false); + llvm::BranchInst::Create(GetOrCreateBlock(inst.branch_not_taken_pc), + next_block); +} + void MCToIRLifter::VisitAfterFunctionCall(const remill::Instruction &inst, llvm::BasicBlock *block) { auto [ret_pc, ret_pc_val] = LoadFunctionReturnAddress(inst, block); @@ -451,15 +530,27 @@ void MCToIRLifter::VisitInstruction( case remill::Instruction::kCategoryIndirectJump: VisitIndirectJump(inst, delayed_inst, block); break; + case remill::Instruction::kCategoryConditionalIndirectJump: + VisitConditionalIndirectJump(inst, delayed_inst, block); + break; case remill::Instruction::kCategoryFunctionReturn: VisitFunctionReturn(inst, delayed_inst, block); break; + case remill::Instruction::kCategoryConditionalFunctionReturn: + VisitConditionalFunctionReturn(inst, delayed_inst, block); + break; case remill::Instruction::kCategoryDirectFunctionCall: VisitDirectFunctionCall(inst, delayed_inst, block); break; + case remill::Instruction::kCategoryConditionalDirectFunctionCall: + VisitConditionalDirectFunctionCall(inst, delayed_inst, block); + break; case remill::Instruction::kCategoryIndirectFunctionCall: VisitIndirectFunctionCall(inst, delayed_inst, block); break; + case remill::Instruction::kCategoryConditionalIndirectFunctionCall: + VisitConditionalIndirectFunctionCall(inst, delayed_inst, block); + break; case remill::Instruction::kCategoryConditionalBranch: VisitConditionalBranch(inst, delayed_inst, block); break;