Skip to content

Commit

Permalink
bump lief to most recent main (#8)
Browse files Browse the repository at this point in the history
* bump lief to most recent main

* bump lief to most recent main
  (3414ded8cdcbd9705f7871c66c212b15cd74ea69)
* created new nix directory and moved overlay there
* had to copy lief derivation from nixpkg for now since lief changed how
  it is built.

* Type annotation fixes

Lot of changes to the types in the recent lief.
* removed name from Binary
* no name for section/symbol of type str
  (lief-project/LIEF#965)
* created a proxy class Binary
* added pyright to check tests folder
  • Loading branch information
fzakaria authored Sep 11, 2023
1 parent 96f0c82 commit c5532dd
Show file tree
Hide file tree
Showing 15 changed files with 154 additions and 59 deletions.
2 changes: 1 addition & 1 deletion flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
let
pkgs = import nixpkgs {
inherit system;
overlays = [ poetry2nix.overlay (import ./overlay.nix) ];
overlays = [ poetry2nix.overlay (import ./nix/overlay.nix) ];
};
in
{
Expand Down
2 changes: 1 addition & 1 deletion derivation.nix → nix/derivation.nix
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{ poetry2nix, poetryOverrides }:
poetry2nix.mkPoetryApplication {
projectDir = ./.;
projectDir = ../.;
overrides = poetry2nix.overrides.withDefaults poetryOverrides;
}
44 changes: 44 additions & 0 deletions nix/lief.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# This is an unreleased version of Lief that fixes a bug when generates GNU notes
# https://github.com/lief-project/LIEF/commit/3414ded8cdcbd9705f7871c66c212b15cd74ea69
# Nixpkgs derivation was updated to change how lief was built since it no longer has setup.py
# in the root of the directory.
# For now, we copy the derivation until it's merged into nixpkgs we are tracking.
# https://github.com/NixOS/nixpkgs/pull/251414
{ fetchFromGitHub, python, stdenv, cmake, ninja }:
let
pyEnv = python.withPackages (ps: [ ps.setuptools ps.tomli ps.pip ps.setuptools ]);
in
stdenv.mkDerivation rec {
pname = "lief";
version = "0.14.0-3414ded";
src = fetchFromGitHub {
owner = "lief-project";
repo = "LIEF";
rev = "3414ded8cdcbd9705f7871c66c212b15cd74ea69";
sha256 = "sha256-GJTj4w8HhAiC2bQAjEIqPw9feaOHL4fmAfLACioW0Q0=";
};
outputs = [ "out" "py" ];

nativeBuildInputs = [
cmake
ninja
];

# Not a propagatedBuildInput because only the $py output needs it; $out is
# just the library itself (e.g. C/C++ headers).
buildInputs = [
python
];

postBuild = ''
pushd /build/source/api/python
${pyEnv.interpreter} setup.py build --parallel=$NIX_BUILD_CORES
popd
'';

postInstall = ''
pushd /build/source/api/python
${pyEnv.interpreter} setup.py install --skip-build --root=/ --prefix=$py
popd
'';
}
8 changes: 7 additions & 1 deletion overlay.nix → nix/overlay.nix
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ self: super: {
sqlelf = self.callPackage ./derivation.nix { };

sqlelf-env = self.poetry2nix.mkPoetryEnv {
projectDir = ./.;
projectDir = ../.;
overrides = self.poetry2nix.overrides.withDefaults self.poetryOverrides;
editablePackageSources = { sqlelf = ./sqlelf; };
};
Expand All @@ -16,10 +16,16 @@ self: super: {
};
});


lief-3414ded = self.callPackage ./lief.nix { python = self.python3; };

poetryOverrides = self: super: {
lief = super.toPythonModule super.pkgs.lief-3414ded.py;

sh = super.sh.overridePythonAttrs (old: {
buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry ];
});

apsw = super.apsw.overridePythonAttrs (old: rec {
version = "3.43.0.0";
src = super.pkgs.fetchFromGitHub {
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ license = "LICENSE"
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
capstone = "^5.0.1"
lief = "^0.13.2"
lief = ">=0.13.2"
apsw = "^3.43.0.0"
# TODO(fzakaria): Would love to specify this as an exact version
# but I was getting weird failures with `nix build`
Expand Down Expand Up @@ -40,7 +40,7 @@ profile = "black"
addopts = ""

[tool.pyright]
include = ["sqlelf"]
include = ["sqlelf", "tests"]
exclude = ["**/__pycache__"]

reportMissingImports = true
Expand Down
13 changes: 7 additions & 6 deletions sqlelf/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
import apsw
import apsw.bestpractice
import apsw.shell
import lief

from sqlelf import ldd

from .elf import dynamic, header, instruction, section, strings, symbol
from sqlelf.elf import dynamic, header, instruction, section, strings, symbol
from sqlelf.elf.binary import Binary


def start(args=sys.argv[1:], stdin=sys.stdin):
Expand Down Expand Up @@ -55,13 +54,15 @@ def start(args=sys.argv[1:], stdin=sys.stdin):
),
)
# Filter the list of filenames to those that are ELF files only
filenames = list(filter(lambda f: os.path.isfile(f) and lief.is_elf(f), filenames))
filenames = list(
filter(lambda f: os.path.isfile(f) and Binary.is_elf(f), filenames)
)

# If none of the inputs are valid files, simply return
if len(filenames) == 0:
return

binaries: list[lief.Binary] = [lief.parse(filename) for filename in filenames]
binaries: list[Binary] = [Binary(filename) for filename in filenames]

# If the recursive option is specidied, load the shared libraries
# the binary would load as well.
Expand All @@ -76,7 +77,7 @@ def start(args=sys.argv[1:], stdin=sys.stdin):
for library in sub_list
]
)
binaries = binaries + [lief.parse(library) for library in shared_libraries]
binaries = binaries + [Binary(library) for library in shared_libraries]

# forward sqlite logs to logging module
apsw.bestpractice.apply(apsw.bestpractice.recommended)
Expand Down
33 changes: 33 additions & 0 deletions sqlelf/elf/binary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# pyright: strict
from typing import TYPE_CHECKING, Any

import lief

# Let's make sure type checking works for this proxy class
# https://stackoverflow.com/questions/71365594/how-to-make-a-proxy-object-with-typing-as-underlying-object-in-python
if TYPE_CHECKING:
base = lief.ELF.Binary
else:
base = object


class Binary(base):
"""Proxy the lief.Binary object to add a path attribute.
As of https://github.com/lief-project/LIEF/issues/839 the name
attribute in lief.Binary was removed. Rather than passing around
a tuple let's create a nice proxy class.
"""

def __init__(self, path: str):
self.path = path
self.__binary = lief.parse(path)

if not TYPE_CHECKING:

def __getattr__(self, attr: str) -> Any:
return getattr(self.__binary, attr)

@staticmethod
def is_elf(path: str):
return lief.is_elf(path)
17 changes: 11 additions & 6 deletions sqlelf/elf/dynamic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,29 @@

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary


# This is effectively the .dynamic section but it is elevated as a table here
# since it is widely used and can benefit from simpler table access.
def elf_dynamic_entries(binaries: list[lief.Binary]):
def elf_dynamic_entries(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
for entry in binary.dynamic_entries: # pyright: ignore
yield {"path": binary_name, "tag": entry.tag.name, "value": entry.value}
binary_path = binary.path
for entry in binary.dynamic_entries:
yield {
"path": binary_path,
"tag": entry.tag.__name__,
"value": entry.value,
}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_dynamic_entries(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
18 changes: 8 additions & 10 deletions sqlelf/elf/header.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
# Without this Python was complaining
from __future__ import annotations

from typing import Any, Iterator

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary


def elf_headers(binaries: list[lief.Binary]):
def elf_headers(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
yield {
"path": binary.name,
"type": binary.header.file_type.name,
"machine": binary.header.machine_type.name,
"version": binary.header.identity_version.name,
"path": binary.path,
"type": binary.header.file_type.__name__,
"machine": binary.header.machine_type.__name__,
"version": binary.header.identity_version.__name__,
"entry": binary.header.entrypoint,
}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_headers(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
18 changes: 10 additions & 8 deletions sqlelf/elf/instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@
import capstone # pyright: ignore
import lief

from sqlelf.elf.binary import Binary

def elf_instructions(binaries: list[lief.Binary]):

def elf_instructions(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path

for section in binary.sections:
if section.has(lief.ELF.SECTION_FLAGS.EXECINSTR):
Expand All @@ -34,7 +36,7 @@ def generator() -> Iterator[dict[str, Any]]:
data, section.virtual_address
):
yield {
"path": binary_name,
"path": binary_path,
"section": section_name,
"mnemonic": mnemonic,
"address": address,
Expand All @@ -44,19 +46,19 @@ def generator() -> Iterator[dict[str, Any]]:
return generator


def mode(binary: lief.Binary) -> int:
def mode(binary: Binary) -> int:
if binary.header.identity_class == lief.ELF.ELF_CLASS.CLASS64:
return capstone.CS_MODE_64
raise Exception(f"Unknown mode for {binary.name}")
raise Exception(f"Unknown mode for {binary.path}")


def arch(binary: lief.Binary) -> int:
def arch(binary: Binary) -> int:
if binary.header.machine_type == lief.ELF.ARCH.x86_64:
return capstone.CS_ARCH_X86
raise Exception(f"Unknown machine type for {binary.name}")
raise Exception(f"Unknown machine type for {binary.path}")


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_instructions(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
13 changes: 7 additions & 6 deletions sqlelf/elf/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,23 @@

import apsw
import apsw.ext
import lief

from sqlelf.elf.binary import Binary

def elf_sections(binaries: list[lief.Binary]):

def elf_sections(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path
for section in binary.sections:
yield {
"path": binary_name,
"path": binary_path,
"name": section.name,
"offset": section.offset,
"size": section.size,
"type": section.type.name,
"type": section.type.__name__,
"content": bytes(section.content),
}

Expand All @@ -33,7 +34,7 @@ def section_name(name: str | None) -> str | None:
return name


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_sections(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
10 changes: 6 additions & 4 deletions sqlelf/elf/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
import apsw.ext
import lief

from sqlelf.elf.binary import Binary

def elf_strings(binaries: list[lief.Binary]):

def elf_strings(binaries: list[Binary]):
def generator() -> Iterator[dict[str, Any]]:
for binary in binaries:
strtabs = [
Expand All @@ -18,19 +20,19 @@ def generator() -> Iterator[dict[str, Any]]:
]
# super important that these accessors are pulled out of the tight loop
# as they can be costly
binary_name = binary.name
binary_path = binary.path
for strtab in strtabs:
# The first byte is always the null byte in the STRTAB
# Python also treats the final null in the string by creating
# an empty item so we chop it off.
# https://stackoverflow.com/a/18970869
for string in str(strtab.content[1:-1], "utf-8").split("\x00"):
yield {"path": binary_name, "section": strtab.name, "value": string}
yield {"path": binary_path, "section": strtab.name, "value": string}

return generator


def register(connection: apsw.Connection, binaries: list[lief.Binary]):
def register(connection: apsw.Connection, binaries: list[Binary]):
generator = elf_strings(binaries)
# setup columns and access by providing an example of the first entry returned
generator.columns, generator.column_access = apsw.ext.get_column_names(
Expand Down
Loading

0 comments on commit c5532dd

Please sign in to comment.