Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix circular references and some typos #3

Merged
merged 1 commit into from
Jun 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion binexport/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
from .function import FunctionBinExport
from .basic_block import BasicBlockBinExport
from .instruction import InstructionBinExport
from.operand import OperandBinExport
from .operand import OperandBinExport
from .expression import ExpressionBinExport
7 changes: 3 additions & 4 deletions binexport/basic_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class BasicBlockBinExport(OrderedDict):
methods to access instructions.
"""

def __init__(self, program: "ProgramBinExport", function: "FunctionBinExport", pb_bb: "BinExport2.BasicBlock"):
def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.ref["FunctionBinExport"], pb_bb: "BinExport2.BasicBlock"):
"""
:param program: Weak reference to the program
:param function: Weak reference to the function
Expand All @@ -29,16 +29,15 @@ def __init__(self, program: "ProgramBinExport", function: "FunctionBinExport", p
self.bytes = b"" #: bytes of the basic block

# Ranges are in fact the true basic blocks but BinExport
# don't have the same basic block semantic and merge multiple basic blocks into one.
# doesn't have the same basic block semantic and merge multiple basic blocks into one.
# For example: BB_1 -- unconditional_jmp --> BB_2
# might be merged into a single basic block so lose the edge
# might be merged into a single basic block so the edge gets lost.
for rng in pb_bb.instruction_index:
for idx in instruction_index_range(rng):
pb_inst = self.program.proto.instruction[idx]
inst_addr = get_instruction_address(self.program.proto, idx)

# The first instruction determines the basic block address
# Save the first instruction to guess the instruction set
if self.addr is None:
self.addr = inst_addr

Expand Down
47 changes: 21 additions & 26 deletions binexport/expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,14 @@ def __init__(self,
self.is_addr: bool = False #: whether the value is referring to an address
self.is_data: bool = False #: whether the value is a reference to data

# Expression object in the protobuf structure
self.pb_expr = program.proto.expression[self._idx]

self._parse_protobuf(program, function, instruction)

def __hash__(self) -> int:
return hash(self._idx)

@property
def pb_expr(self) -> BinExport2.Expression:
"""
Returns the operand object in the protobuf structure
"""
return self.program.proto.expression[self._idx]

@property
def type(self) -> ExpressionType:
"""
Expand Down Expand Up @@ -112,46 +108,45 @@ def _parse_protobuf(self,
"""
Low-level expression parser. It populates self._type and self._value
"""
pb_expr = program.proto.expression[self._idx]
if pb_expr.type == BinExport2.Expression.SYMBOL:
self._value = pb_expr.symbol
if self.pb_expr.type == BinExport2.Expression.SYMBOL:
self._value = self.pb_expr.symbol

if pb_expr.symbol in program.fun_names: # It is a function name
if self.pb_expr.symbol in program.fun_names: # It is a function name
self._type = ExpressionType.FUNC_NAME
else: # It is a local symbol (ex: var_, arg_)
self._type = ExpressionType.VAR_NAME

elif pb_expr.type == BinExport2.Expression.IMMEDIATE_INT:
elif self.pb_expr.type == BinExport2.Expression.IMMEDIATE_INT:
self._type = ExpressionType.IMMEDIATE_INT
self._value = to_signed(pb_expr.immediate, program.mask)
self._value = to_signed(self.pb_expr.immediate, program.mask)

if pb_expr.immediate in instruction.data_refs: # Data
if self.pb_expr.immediate in instruction.data_refs: # Data
self.is_addr = True
self.is_data = True
elif (
pb_expr.immediate in program or pb_expr.immediate in function
self.pb_expr.immediate in program or self.pb_expr.immediate in function
): # Address
self.is_addr = True

elif pb_expr.type == BinExport2.Expression.IMMEDIATE_FLOAT:
elif self.pb_expr.type == BinExport2.Expression.IMMEDIATE_FLOAT:
self._type = ExpressionType.IMMEDIATE_FLOAT
self._value = pb_expr.immediate # Cast it to float
self._value = self.pb_expr.immediate # Cast it to float

elif pb_expr.type == BinExport2.Expression.OPERATOR:
elif self.pb_expr.type == BinExport2.Expression.OPERATOR:
self._type = ExpressionType.SYMBOL
self._value = pb_expr.symbol
self._value = self.pb_expr.symbol

elif pb_expr.type == BinExport2.Expression.REGISTER:
elif self.pb_expr.type == BinExport2.Expression.REGISTER:
self._type = ExpressionType.REGISTER
self._value = pb_expr.symbol
self._value = self.pb_expr.symbol

elif pb_expr.type == BinExport2.Expression.SIZE_PREFIX:
elif self.pb_expr.type == BinExport2.Expression.SIZE_PREFIX:
self._type = ExpressionType.SIZE
self._value = self.__sz_lookup[pb_expr.symbol]
self._value = self.__sz_lookup[self.pb_expr.symbol]

elif pb_expr.type == BinExport2.Expression.DEREFERENCE:
elif self.pb_expr.type == BinExport2.Expression.DEREFERENCE:
self._type = ExpressionType.SYMBOL
self._value = pb_expr.symbol
self._value = self.pb_expr.symbol

else:
logging.error(f"Malformed protobuf message. Invalid expression type {pb_expr.type}")
logging.error(f"Malformed protobuf message. Invalid expression type {self.pb_expr.type}")
6 changes: 4 additions & 2 deletions binexport/instruction.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import weakref
from functools import cached_property

from binexport.operand import OperandBinExport
from binexport.types import Addr
Expand All @@ -10,7 +11,7 @@ class InstructionBinExport:
Instruction class. It represents an instruction with its operands.
"""

def __init__(self, program: "ProgramBinExport", function: "FunctionBinExport", addr: Addr, i_idx: int):
def __init__(self, program: weakref.ref["ProgramBinExport"], function: weakref.ref["FunctionBinExport"], addr: Addr, i_idx: int):
"""
:param program: Weak reference to the program
:param function: Weak reference to the function
Expand Down Expand Up @@ -54,10 +55,11 @@ def mnemonic(self) -> str:
"""
return self.program.proto.mnemonic[self.pb_instr.mnemonic_index].name

@property
@cached_property
def operands(self) -> List[OperandBinExport]:
"""
Returns a list of the operands instanciated dynamically on-demand.
The list is cached by default, to erase the cache delete the attribute.
"""
return [
OperandBinExport(self._program, self._function, weakref.ref(self), op_idx)
Expand Down
1 change: 1 addition & 0 deletions binexport/operand.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def expressions(self) -> List[ExpressionBinExport]:
"""
Iterates over all the operand expression in a pre-order manner
(binary operator first).
The list is cached by default, to erase the cache delete the attribute
"""

expr_dict = {} # {expression protobuf idx : ExpressionBinExport}
Expand Down