From 30482bb60ebb29670bc5be77d069663ca55cc778 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Wed, 30 Aug 2023 17:47:52 +0200 Subject: [PATCH 1/6] Update gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index c1f321c..f58a61f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,9 @@ +### Python template +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + build/ *.egg-info/ /venv/ From b142eabcc638bd59f0838e6397bdf64cb33d4005 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Wed, 30 Aug 2023 17:48:44 +0200 Subject: [PATCH 2/6] Move to pyproject.toml and add python-magic-bin for windows --- pyproject.toml | 32 +++++++++++++++++++ {binexport => src/binexport}/__init__.py | 0 {binexport => src/binexport}/basic_block.py | 0 {binexport => src/binexport}/binexport2.proto | 0 .../binexport}/binexport2_pb2.py | 0 {binexport => src/binexport}/expression.py | 0 {binexport => src/binexport}/function.py | 0 {binexport => src/binexport}/instruction.py | 0 {binexport => src/binexport}/operand.py | 0 {binexport => src/binexport}/program.py | 0 {binexport => src/binexport}/types.py | 0 {binexport => src/binexport}/utils.py | 0 12 files changed, 32 insertions(+) rename {binexport => src/binexport}/__init__.py (100%) rename {binexport => src/binexport}/basic_block.py (100%) rename {binexport => src/binexport}/binexport2.proto (100%) rename {binexport => src/binexport}/binexport2_pb2.py (100%) rename {binexport => src/binexport}/expression.py (100%) rename {binexport => src/binexport}/function.py (100%) rename {binexport => src/binexport}/instruction.py (100%) rename {binexport => src/binexport}/operand.py (100%) rename {binexport => src/binexport}/program.py (100%) rename {binexport => src/binexport}/types.py (100%) rename {binexport => src/binexport}/utils.py (100%) diff --git a/pyproject.toml b/pyproject.toml index e31cd35..1a1f622 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,6 +2,38 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" +[project] +name = "python-binexport" +version = "0.1.0" +description = "Python wrapper to manipulate binexport files (protobuf)" +readme = { file = "README.md", content-type = "text/markdown" } +authors = [{ name = "Quarkslab", email = "diffing@quarkslab.com" }] +dependencies = [ + "python-magic", + "python-magic-bin; os_name=='nt'", + "click", + "protobuf", + "networkx", + "enum_tools", +] +classifiers = [ + 'Topic :: Security', + 'Environment :: Console', + 'Operating System :: OS Independent', +] + +[project.urls] +Homepage = "https://github.com/quarkslab/python-binexport" +Repository = "https://github.com/quarkslab/python-binexport" +Documentation = "https://quarkslab.github.io/diffing-portal/exporter/binexport.html#python-binexport" +"Bug Tracker" = "https://github.com/quarkslab/python-binexport/issues" + +[project.optional-dependencies] +idascript = ["idascript"] + +[tools.setuptools] +script-files = ["bin/binexporter"] + [tool.black] line-length = 100 target-version = ['py310'] diff --git a/binexport/__init__.py b/src/binexport/__init__.py similarity index 100% rename from binexport/__init__.py rename to src/binexport/__init__.py diff --git a/binexport/basic_block.py b/src/binexport/basic_block.py similarity index 100% rename from binexport/basic_block.py rename to src/binexport/basic_block.py diff --git a/binexport/binexport2.proto b/src/binexport/binexport2.proto similarity index 100% rename from binexport/binexport2.proto rename to src/binexport/binexport2.proto diff --git a/binexport/binexport2_pb2.py b/src/binexport/binexport2_pb2.py similarity index 100% rename from binexport/binexport2_pb2.py rename to src/binexport/binexport2_pb2.py diff --git a/binexport/expression.py b/src/binexport/expression.py similarity index 100% rename from binexport/expression.py rename to src/binexport/expression.py diff --git a/binexport/function.py b/src/binexport/function.py similarity index 100% rename from binexport/function.py rename to src/binexport/function.py diff --git a/binexport/instruction.py b/src/binexport/instruction.py similarity index 100% rename from binexport/instruction.py rename to src/binexport/instruction.py diff --git a/binexport/operand.py b/src/binexport/operand.py similarity index 100% rename from binexport/operand.py rename to src/binexport/operand.py diff --git a/binexport/program.py b/src/binexport/program.py similarity index 100% rename from binexport/program.py rename to src/binexport/program.py diff --git a/binexport/types.py b/src/binexport/types.py similarity index 100% rename from binexport/types.py rename to src/binexport/types.py diff --git a/binexport/utils.py b/src/binexport/utils.py similarity index 100% rename from binexport/utils.py rename to src/binexport/utils.py From ffde431f4be72a5e865e7a0429f3a55f62c40a7e Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 17 Nov 2023 14:46:37 +0100 Subject: [PATCH 3/6] Make idascript a required dependency --- pyproject.toml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1a1f622..264f976 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python-binexport" -version = "0.1.0" +version = "0.2.1" description = "Python wrapper to manipulate binexport files (protobuf)" readme = { file = "README.md", content-type = "text/markdown" } authors = [{ name = "Quarkslab", email = "diffing@quarkslab.com" }] @@ -15,6 +15,7 @@ dependencies = [ "protobuf", "networkx", "enum_tools", + "idascript", ] classifiers = [ 'Topic :: Security', @@ -28,9 +29,6 @@ Repository = "https://github.com/quarkslab/python-binexport" Documentation = "https://quarkslab.github.io/diffing-portal/exporter/binexport.html#python-binexport" "Bug Tracker" = "https://github.com/quarkslab/python-binexport/issues" -[project.optional-dependencies] -idascript = ["idascript"] - [tools.setuptools] script-files = ["bin/binexporter"] From 37bbb3ec638000df90e7bb5df415265095ee08c1 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 17 Nov 2023 15:12:45 +0100 Subject: [PATCH 4/6] Fix the CI for building sdist --- .github/workflows/release.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 34d9b16..b5fccaf 100755 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -31,7 +31,9 @@ jobs: with: python-version: '3.10' - name: Build sdist - run: python setup.py sdist + run: | + python -m pip install -U pip build + python -m build --sdist - uses: actions/upload-artifact@v3 with: name: artifact From 1ce07f01364cf2cfc5e928f94037ba5c75cb2897 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 17 Nov 2023 15:05:25 +0100 Subject: [PATCH 5/6] Cache objects below basic blocks. Functions can preload blocks --- src/binexport/basic_block.py | 21 +++---------- src/binexport/function.py | 60 +++++++++++++++++++++++++++--------- src/binexport/instruction.py | 20 +++++------- src/binexport/operand.py | 20 +++--------- 4 files changed, 61 insertions(+), 60 deletions(-) diff --git a/src/binexport/basic_block.py b/src/binexport/basic_block.py index 9e8bd5d..6cdae85 100644 --- a/src/binexport/basic_block.py +++ b/src/binexport/basic_block.py @@ -11,6 +11,7 @@ from .function import FunctionBinExport from .binexport2_pb2 import BinExport2 + class BasicBlockBinExport: """ Basic block class. @@ -81,14 +82,12 @@ def function(self) -> "FunctionBinExport": """ return self._function() - @property - def uncached_instructions(self) -> dict[Addr, InstructionBinExport]: + @cached_property + def instructions(self) -> dict[Addr, InstructionBinExport]: """ Returns a dict which is used to reference all the instructions in this basic block by their address. - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `BasicBlockBinExport.instructions`. + The object returned is by default cached, to erase the cache delete the attribute. :return: dictionary of addresses to instructions """ @@ -108,15 +107,3 @@ def uncached_instructions(self) -> dict[Addr, InstructionBinExport]: ) return instructions - - @cached_property - def instructions(self) -> dict[Addr, InstructionBinExport]: - """ - Returns a dict which is used to reference all the instructions in this basic - block by their address. - The object returned is by default cached, to erase the cache delete the attribute. - - :return: dictionary of addresses to instructions - """ - - return self.uncached_instructions diff --git a/src/binexport/function.py b/src/binexport/function.py index 667e868..7907d2a 100644 --- a/src/binexport/function.py +++ b/src/binexport/function.py @@ -49,6 +49,8 @@ def __init__( self._name = None # Set by the Program constructor self._program = program self._pb_fun = pb_fun + self._enable_unloading = False + self._basic_blocks = None if is_import: if self.addr is None: @@ -70,6 +72,29 @@ def __hash__(self) -> int: def __repr__(self) -> str: return "<%s: 0x%x>" % (type(self).__name__, self.addr) + def __enter__(self) -> None: + """Preload basic blocks and don't deallocate them until __exit__ is called""" + + self._enable_unloading = False + self.preload() + + def __exit__(self, exc_type, exc_value, traceback) -> None: + """Deallocate all the basic blocks""" + + self._enable_unloading = True + self.unload() + + def preload(self) -> None: + """Load in memory all the basic blocks""" + + self._basic_blocks = self.blocks + + def unload(self) -> None: + """Unload from memory all the basic blocks""" + + if self._enable_unloading: + self._basic_blocks = None + def items(self) -> abc.ItemsView[Addr, "BasicBlockBinExport"]: """ Each function is associated to a dictionary with key-value @@ -117,17 +142,34 @@ def program(self) -> "ProgramBinExport": return self._program() @property - def uncached_blocks(self) -> dict[Addr, BasicBlockBinExport]: + def blocks(self) -> Dict[Addr, BasicBlockBinExport]: """ Returns a dict which is used to reference all basic blocks by their address. Calling this function will also load the CFG. - The object returned is not cached, calling this function multiple times will + By default the object returned is not cached, calling this function multiple times will create the same object multiple times. If you want to cache the object you - should use `FunctionBinExport.blocks`. + should use the context manager of the function or calling the function `FunctionBinExport.load`. + Ex: + + .. code-block:: python + :linenos: + + # func: FunctionBinExport + with func: # Loading all the basic blocks + for bb_addr, bb in func.blocks.items(): # Blocks are already loaded + pass + # The blocks are still loaded + for bb_addr, bb in func.blocks.items(): + pass + # here the blocks have been unloaded :return: dictionary of addresses to basic blocks """ + # Check if the blocks are already loaded + if self._basic_blocks is not None: + return self._basic_blocks + # Fast return if it is a imported function if self.is_import(): if self._graph is None: @@ -177,18 +219,6 @@ def uncached_blocks(self) -> dict[Addr, BasicBlockBinExport]: return bblocks - @cached_property - def blocks(self) -> Dict[Addr, BasicBlockBinExport]: - """ - Returns a dict which is used to reference all basic blocks by their address. - Calling this function will also load the CFG. - The dict is by default cached, to erase the cache delete the attribute. - - :return: dictionary of addresses to basic blocks - """ - - return self.uncached_blocks - @property def graph(self) -> networkx.DiGraph: """ diff --git a/src/binexport/instruction.py b/src/binexport/instruction.py index c9214f6..9f10361 100644 --- a/src/binexport/instruction.py +++ b/src/binexport/instruction.py @@ -10,6 +10,7 @@ from .function import FunctionBinExport from .binexport2_pb2 import BinExport2 + class InstructionBinExport: """ Instruction class. It represents an instruction with its operands. @@ -65,23 +66,16 @@ def mnemonic(self) -> str: """ return self.program.proto.mnemonic[self.pb_instr.mnemonic_index].name - @property - def uncached_operands(self) -> list[OperandBinExport]: + @cached_property + def operands(self) -> List[OperandBinExport]: """ Returns a list of the operands instanciated dynamically on-demand. - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `InstructionBinExport.operands`. + The list is cached by default, to erase the cache delete the attribute. + + :return: list of operands """ + return [ OperandBinExport(self._program, self._function, weakref.ref(self), op_idx) for op_idx in self.pb_instr.operand_index ] - - @cached_property - def operands(self) -> List[OperandBinExport]: - """ - Returns a list of the operands instanciated dynamically on-demand. - The list is cached by default, to erase the cache delete the attribute. - """ - return self.uncached_operands diff --git a/src/binexport/operand.py b/src/binexport/operand.py index 9e4dcd3..b35a6b5 100644 --- a/src/binexport/operand.py +++ b/src/binexport/operand.py @@ -114,14 +114,14 @@ def pb_operand(self) -> "BinExport2.Operand": """ return self.program.proto.operand[self._idx] - @property - def uncached_expressions(self) -> List[ExpressionBinExport]: + @cached_property + def expressions(self) -> List[ExpressionBinExport]: """ Iterates over all the operand expression in a pre-order manner (binary operator first). - The object returned is not cached, calling this function multiple times will - create the same object multiple times. If you want to cache the object you - should use `OperandBinExport.expressions`. + The list is cached by default, to erase the cache delete the attribute + + :return: list of expressions """ expr_dict = {} # {expression protobuf idx : ExpressionBinExport} @@ -133,13 +133,3 @@ def uncached_expressions(self) -> List[ExpressionBinExport]: self.program, self.function, self.instruction, exp_idx, parent ) return list(expr_dict.values()) - - @cached_property - def expressions(self) -> List[ExpressionBinExport]: - """ - Iterates over all the operand expression in a pre-order manner - (binary operator first). - The list is cached by default, to erase the cache delete the attribute - """ - - return self.uncached_expressions From 689c2e6d2612f2bdc2f4756d201d56a08cac2bc5 Mon Sep 17 00:00:00 2001 From: Riccardo Mori Date: Fri, 17 Nov 2023 15:18:20 +0100 Subject: [PATCH 6/6] Update README to reflect new API --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6f1eb13..b93e87e 100644 --- a/README.md +++ b/README.md @@ -46,11 +46,12 @@ from binexport import ProgramBinExport p = ProgramBinExport("myprogram.BinExport") for fun_addr, fun in p.items(): - for bb_addr, bb in fun.items(): - for inst_addr, inst in bb.items(): - for operand in inst.operands: - for exp in operand.expressions: - pass # Do whatever at such deep level + with fun: # Preload all the basic blocks + for bb_addr, bb in fun.items(): + for inst_addr, inst in bb.instructions.items(): + for operand in inst.operands: + for exp in operand.expressions: + pass # Do whatever at such deep level ``` Obviously ``ProgramBinExport``, ``FunctionBinExport``, ``InstructionBinExport`` and ``OperandBinExport``