From 53ced812ceef27fe7d12338bc3dc72f582fc7066 Mon Sep 17 00:00:00 2001 From: Farid Zakaria Date: Fri, 1 Sep 2023 19:10:49 -0700 Subject: [PATCH] Add support for `--recursive` (#7) Add support for loading recursively all shared libraries by a library. This is useful if you want to inspect symbol resolution for instance. * add pytest * added some simple unit tests as scaffolding * updated the README --- README.md | 52 ++++++++++++++++++++++++++++++++++++++- overlay.nix | 3 +++ poetry.lock | 16 ++++++++++-- pyproject.toml | 3 ++- sqlelf/cli.py | 22 ++++++++++++++++- sqlelf/elf/instruction.py | 3 +-- sqlelf/ldd.py | 23 +++++++++++++++++ tests/test_cli.py | 3 +++ tests/test_ldd.py | 8 ++++++ 9 files changed, 126 insertions(+), 7 deletions(-) create mode 100644 sqlelf/ldd.py create mode 100644 tests/test_ldd.py diff --git a/README.md b/README.md index bce94b0..b9969c9 100644 --- a/README.md +++ b/README.md @@ -95,7 +95,10 @@ positional arguments: FILE The ELF file to analyze options: - -h, --help show this help message and exit + -h, --help show this help message and exit + -s SQL, --sql SQL Potential SQL to execute. Omitting this enters the REPL. + --recursive, --no-recursive + Load all shared libraries needed by each file using ldd ``` Note: You may provide directories for `FILE`. Avoid giving too many binaries though since they must all be parsed at startup. @@ -139,6 +142,53 @@ path|num_sections /usr/bin/ruby|28 ``` +### Queries + +#### List all symbol resolutions + +```console +❯ sqlelf /usr/bin/ruby --sql "SELECT caller.path as 'caller.path', + callee.path as 'calee.path', + caller.name, + caller.demangled_name +FROM ELF_SYMBOLS caller +INNER JOIN ELF_SYMBOLS callee +ON +caller.name = callee.name AND +caller.path != callee.path AND +caller.imported = TRUE AND +callee.exported = TRUE +LIMIT 25;" +┌──────────────────────────────────────────┬──────────────────────────────────────────┬──────────────────────┬──────────────────────┐ +│ caller.path │ calee.path │ name │ demangled_name │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ ruby_run_node │ ruby_run_node │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ ruby_init │ ruby_init │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ ruby_options │ ruby_options │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ ruby_sysinit │ ruby_sysinit │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libc.so.6 │ __stack_chk_fail │ __stack_chk_fail │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ ruby_init_stack │ ruby_init_stack │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libc.so.6 │ setlocale │ setlocale │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libc.so.6 │ __libc_start_main │ __libc_start_main │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libc.so.6 │ __libc_start_main │ __libc_start_main │ +│ /usr/bin/ruby │ /lib/x86_64-linux-gnu/libc.so.6 │ __cxa_finalize │ __cxa_finalize │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ initgroups │ initgroups │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libm.so.6 │ log10 │ log10 │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ chmod │ chmod │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libgmp.so.10 │ __gmpz_mul │ __gmpz_mul │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libm.so.6 │ lgamma_r │ lgamma_r │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ symlink │ symlink │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ mprotect │ mprotect │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ pipe2 │ pipe2 │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ seteuid │ seteuid │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ chdir │ chdir │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ fileno │ fileno │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ dup2 │ dup2 │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ pthread_cond_destroy │ pthread_cond_destroy │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libc.so.6 │ pthread_cond_destroy │ pthread_cond_destroy │ +│ /lib/x86_64-linux-gnu/libruby-3.1.so.3.1 │ /lib/x86_64-linux-gnu/libm.so.6 │ atan2 │ atan2 │ +└──────────────────────────────────────────┴──────────────────────────────────────────┴──────────────────────┴──────────────────────┘ +``` + ## Development You must have [Nix](https://nixos.org) installed for development. diff --git a/overlay.nix b/overlay.nix index 093b5c8..3a103be 100644 --- a/overlay.nix +++ b/overlay.nix @@ -17,6 +17,9 @@ self: super: { }); poetryOverrides = self: super: { + sh = super.sh.overridePythonAttrs (old: { + buildInputs = (old.buildInputs or [ ]) ++ [ super.poetry ]; + }); apsw = super.apsw.overridePythonAttrs (old: rec { version = "3.43.0.0"; src = super.pkgs.fetchFromGitHub { diff --git a/poetry.lock b/poetry.lock index 169ae17..8a488f7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -440,6 +440,18 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +[[package]] +name = "sh" +version = "2.0.6" +description = "Python subprocess replacement" +category = "main" +optional = false +python-versions = ">=3.8.1,<4.0" +files = [ + {file = "sh-2.0.6-py3-none-any.whl", hash = "sha256:ced8f2e081a858b66a46ace3703dec243779abbd5a1887ba7e3c34f34da70cd2"}, + {file = "sh-2.0.6.tar.gz", hash = "sha256:9b2998f313f201c777e2c0061f0b1367497097ef13388595be147e2a00bf7ba1"}, +] + [[package]] name = "tomli" version = "2.0.1" @@ -454,5 +466,5 @@ files = [ [metadata] lock-version = "2.0" -python-versions = ">=3.10" -content-hash = "89fac12455f97e1a220b2f47c76549829e90082685010ad7e8af9e33f052a2ea" +python-versions = ">=3.10,<4.0" +content-hash = "9db545297dad604636c6b2de79f3b5a8bc5072ee8a2df2a75aa731ed5de9fb87" diff --git a/pyproject.toml b/pyproject.toml index 135166a..580ad23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ description = "Explore ELF objects through the power of SQL" license = "LICENSE" [tool.poetry.dependencies] -python = ">=3.10" +python = ">=3.10,<4.0" capstone = "^5.0.1" lief = "^0.13.2" apsw = "^3.43.0.0" @@ -16,6 +16,7 @@ apsw = "^3.43.0.0" # ERROR: Could not find a version that satisfies the requirement setuptools<61.0.0,>=60.0.0 (from sqlelf) (from versions: none) # ERROR: No matching distribution found for setuptools<61.0.0,>=60.0.0 setuptools = "*" +sh = "^2.0.6" [tool.poetry.group.dev.dependencies] black = "^23.7.0" diff --git a/sqlelf/cli.py b/sqlelf/cli.py index 8ca9e19..172c38a 100644 --- a/sqlelf/cli.py +++ b/sqlelf/cli.py @@ -1,5 +1,6 @@ import argparse import os +import os.path import sys from functools import reduce @@ -8,6 +9,8 @@ import apsw.shell import lief +from sqlelf import ldd + from .elf import dynamic, header, instruction, section, strings, symbol @@ -30,6 +33,11 @@ def start(args=sys.argv[1:], stdin=sys.stdin): parser.add_argument( "-s", "--sql", help="Potential SQL to execute. Omitting this enters the REPL." ) + parser.add_argument( + "--recursive", + action=argparse.BooleanOptionalAction, + help="Load all shared libraries needed by each file using ldd", + ) args = parser.parse_args(args) @@ -44,10 +52,22 @@ def start(args=sys.argv[1:], stdin=sys.stdin): ), ) # Filter the list of filenames to those that are ELF files only - filenames = list(filter(lambda f: lief.is_elf(f), filenames)) + filenames = list(filter(lambda f: os.path.isfile(f) and lief.is_elf(f), filenames)) + + # If none of the inputs are valid files, simply return + if len(filenames) == 0: + return binaries: list[lief.Binary] = [lief.parse(filename) for filename in filenames] + # If the recursive option is specidied, load the shared libraries + # the binary would load as well. + if args.recursive: + shared_libraries = [ldd.libraries(binary).values() for binary in binaries] + binaries = binaries + [ + lief.parse(library) for sub_list in shared_libraries for library in sub_list + ] + # forward sqlite logs to logging module apsw.bestpractice.apply(apsw.bestpractice.recommended) diff --git a/sqlelf/elf/instruction.py b/sqlelf/elf/instruction.py index caa3183..7637e17 100644 --- a/sqlelf/elf/instruction.py +++ b/sqlelf/elf/instruction.py @@ -7,8 +7,7 @@ import apsw.ext # TODO(fzkakaria): https://github.com/capstone-engine/capstone/issues/1993 -# pyright: reportMissingTypeStubs=false -import capstone +import capstone # pyright: ignore import lief diff --git a/sqlelf/ldd.py b/sqlelf/ldd.py new file mode 100644 index 0000000..ebc2077 --- /dev/null +++ b/sqlelf/ldd.py @@ -0,0 +1,23 @@ +import re +from collections import OrderedDict +from typing import Dict + +import lief +from sh import Command # pyright: ignore + + +def libraries(binary: lief.Binary) -> Dict[str, str]: + """Use the interpreter in a binary to determine the path of each linked library""" + interpreter = Command(binary.interpreter) # pyright: ignore + resolution = interpreter("--list", binary.name) + result = OrderedDict() + # TODO: Figure out why `--list` and `ldd` produce different outcomes + # specifically for the interpreter. + # https://gist.github.com/fzakaria/3dc42a039401598d8e0fdbc57f5e7eae + for line in resolution.splitlines(): # pyright: ignore + m = re.match(r"\s*([^ ]+) => ([^ ]+)", line) + if not m: + continue + soname, lib = m.group(1), m.group(2) + result[soname] = lib + return result diff --git a/tests/test_cli.py b/tests/test_cli.py index 1a08a21..1f18d77 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -14,6 +14,9 @@ def test_cli_single_file_arguments(): stdin = StringIO("") cli.start(["/bin/ls"], stdin) +def test_cli_single_non_existent_file_arguments(): + cli.start(["does_not_exist"]) + def test_cli_prompt_single_file_arguments(): stdin = StringIO(".exit 56\n") with pytest.raises(SystemExit) as err: diff --git a/tests/test_ldd.py b/tests/test_ldd.py new file mode 100644 index 0000000..0255719 --- /dev/null +++ b/tests/test_ldd.py @@ -0,0 +1,8 @@ +from sqlelf import ldd +import lief + +def test_simple_binary(): + binary = lief.parse("/bin/ls") + result = ldd.libraries(binary) + print(result) + assert len(result) > 0 \ No newline at end of file