Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add pylint support #16

Merged
merged 6 commits into from
Jan 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# TODO:
# * Semmle (lgtm.com)
# * Pylint

name: CI
on:
push:
Expand Down Expand Up @@ -41,8 +37,10 @@ jobs:
run: |
python3 -m pip install pipenv
pipenv install --dev
- name: Linting
- name: flake8
run: pipenv run flake8
- name: pylint
run: pipenv run pylint src
Coverage:
needs: [Lint, Baseline]
runs-on: ubuntu-latest
Expand Down
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ ipdb = "*"
flake8 = "*"
pytest = "*"
pytest-cov = "*"
pylint = "*"
11 changes: 11 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,14 @@ where = src

[flake8]
max-line-length = 120
exclude = build

[pylint.]
max-line-length = 120
check-quote-consistency = yes
logging-format-style = new
expected-line-ending-format = LF
include-naming-hint = yes
ignored-modules = gdb # Ignore because of the GDB integration
notes= # disable warnings for TODO, FIXME etc.
disable=missing-function-docstring
4 changes: 4 additions & 0 deletions src/asm2cfg/__main__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
Let this module to be executed from the command line with python -m src.asm2cfg
from root of the project
"""

from . import command_line

Expand Down
64 changes: 37 additions & 27 deletions src/asm2cfg/asm2cfg.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,27 @@
"""
Module containing main building blocks to parse assembly and draw CFGs.
"""

import re
import sys
import tempfile

from graphviz import Digraph


def escape(instruction):
"""
Escape used dot graph characters in given instruction so they will be
displayed correctly.
"""
instruction = instruction.replace('<', r'\<')
instruction = instruction.replace('>', r'\>')
instruction = instruction.replace('|', r'\|')
instruction = instruction.replace('{', r'\{')
instruction = instruction.replace('}', r'\}')
return instruction


class BasicBlock:
"""
Class to represent a node in CFG with straight lines of code without jump
Expand All @@ -20,7 +38,7 @@ def add_instruction(self, instruction):
"""
Add instruction to this block.
"""
self.instructions.append(self._escape(instruction))
self.instructions.append(escape(instruction))

def add_jump_edge(self, basic_block_key):
"""
Expand Down Expand Up @@ -61,14 +79,6 @@ def __str__(self):
def __repr__(self):
return '\n'.join(self.instructions)

def _escape(self, instruction):
instruction = instruction.replace('<', r'\<')
instruction = instruction.replace('>', r'\>')
instruction = instruction.replace('|', r'\|')
instruction = instruction.replace('{', r'\{')
instruction = instruction.replace('}', r'\}')
return instruction


def print_assembly(basic_blocks):
"""
Expand All @@ -79,15 +89,16 @@ def print_assembly(basic_blocks):


def read_lines(file_path):
""" Read lines from the file and return then as a list. """
lines = []
with open(file_path, 'r') as asm_file:
with open(file_path, 'r', encoding='utf8') as asm_file:
lines = asm_file.readlines()
return lines


# Common regexes
hex_pattern = r'[0-9a-fA-F]+'
hex_long_pattern = r'(?:0x0*)?' + hex_pattern
HEX_PATTERN = r'[0-9a-fA-F]+'
HEX_LONG_PATTERN = r'(?:0x0*)?' + HEX_PATTERN


def get_stripped_and_function_name(line):
Expand All @@ -102,12 +113,12 @@ def get_stripped_and_function_name(line):
'Dump of assembler code from 0x555555555faf to 0x555555557008:'
"""
function_name_pattern = re.compile(r'function (\w+):$')
memory_range_pattern = re.compile(fr'from ({hex_long_pattern}) to ({hex_long_pattern}):$')
memory_range_pattern = re.compile(fr'from ({HEX_LONG_PATTERN}) to ({HEX_LONG_PATTERN}):$')
function_name = function_name_pattern.search(line)
memory_range = memory_range_pattern.search(line)
if function_name is None and memory_range is None:
print('First line of the file does not contain a function name or valid memory range')
exit(1)
sys.exit(1)
if function_name is None:
return [True, f'{memory_range[1]}-{memory_range[2]}']
return [False, function_name[1]]
Expand All @@ -126,8 +137,8 @@ def get_call_pattern(stripped):
0x000055555557259c <+11340>: addr32 call 0x55555558add0 <_Z19exportDebugifyStats>
"""
if stripped:
return re.compile(fr'0x0*({hex_pattern}):.*callq?\s*(.*{hex_pattern}.*)$')
return re.compile(fr'<[+-](\d+)>:.*callq?\s*(.*{hex_pattern}.*)$')
return re.compile(fr'0x0*({HEX_PATTERN}):.*callq?\s*(.*{HEX_PATTERN}.*)$')
return re.compile(fr'<[+-](\d+)>:.*callq?\s*(.*{HEX_PATTERN}.*)$')


def get_jump_pattern(stripped, function_name):
Expand All @@ -141,11 +152,11 @@ def get_jump_pattern(stripped, function_name):
'0x000055555555600f: jmp 0x55555555603d'
"""
if stripped:
return re.compile(fr'0x0*({hex_pattern}):\W+\w+\W+0x0*({hex_pattern})$')
return re.compile(fr'0x0*({HEX_PATTERN}):\W+\w+\W+0x0*({HEX_PATTERN})$')
return re.compile(fr'<[+-](\d+)>:.+<{function_name}[+-](\d+)>')


def get_unconditional_branch_pattern(stripped):
def get_unconditional_branch_pattern():
"""
Return regexp pattern used to identify unconditional jumps.
"""
Expand All @@ -163,11 +174,11 @@ def get_assembly_line_pattern(stripped):
'0x000055555555602a: mov rax,QWORD PTR [rip+0x311f] # 0x555555559150'
"""
if stripped:
return re.compile(fr'0x0*({hex_pattern}):\W+(.+)$')
return re.compile(fr'0x0*({HEX_PATTERN}):\W+(.+)$')
return re.compile(r'<[+-](\d+)>:\W+(.+)$')


def parse_lines(lines, skip_calls):
def parse_lines(lines, skip_calls): # pylint: disable=too-many-locals,too-many-branches,too-many-statements
stripped, function_name = get_stripped_and_function_name(lines[0])
# Dict key contains address where the jump begins and value which address
# to jump to. This also includes calls.
Expand All @@ -176,15 +187,14 @@ def parse_lines(lines, skip_calls):
jump_destinations = set()
call_pattern = get_call_pattern(stripped)
jump_pattern = get_jump_pattern(stripped, function_name)
uncond_jump_pattern = get_unconditional_branch_pattern(stripped)
uncond_jump_pattern = get_unconditional_branch_pattern()

# Iterate over the lines and collect jump targets and branching points.
for line in lines[1:-1]:
match = call_pattern.search(line)
if match is not None and skip_calls:
continue
else:
match = jump_pattern.search(line)
match = jump_pattern.search(line)
if match is not None:
branch_point = match[1]
jump_point = match[2]
Expand Down Expand Up @@ -242,7 +252,7 @@ def parse_lines(lines, skip_calls):
continue
else:
print(f'unsupported line: {line}')
exit(1)
sys.exit(1)

if current_basic_block is not None:
# Add the last basic block from end of the function.
Expand Down Expand Up @@ -271,9 +281,9 @@ def draw_cfg(function_name, basic_blocks, view):
dot.edge(basic_block.key, basic_block.no_jump_edge)
if view:
dot.format = 'gv'
filename = tempfile.NamedTemporaryFile(mode='w+b', prefix=function_name)
dot.view(filename.name)
print(f'Opening a file {filename.name}.{dot.format} with default viewer. Don\'t forget to delete it later.')
with tempfile.NamedTemporaryFile(mode='w+b', prefix=function_name) as filename:
dot.view(filename.name)
print(f'Opening a file {filename.name}.{dot.format} with default viewer. Don\'t forget to delete it later.')
else:
dot.format = 'pdf'
dot.render(filename=function_name, cleanup=True)
Expand Down
5 changes: 5 additions & 0 deletions src/asm2cfg/command_line.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
"""
Command-line usage support.
"""

import argparse
from . import asm2cfg


def main():
""" Command-line entry point to the program. """
parser = argparse.ArgumentParser(
description='Program to draw dot control-flow graph from GDB disassembly for a function.',
epilog='If function CFG rendering takes too long, try to skip function calls with -c flag.'
Expand Down
47 changes: 26 additions & 21 deletions src/gdb_asm2cfg.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
# This file includes the GDB extension for asm2cfg. Commands and settings are
# exposed to GDB by extending the certain classes.
# For further information see
# https://sourceware.org/gdb/current/onlinedocs/gdb/Python.html#Python.
"""
This file includes the GDB extension for asm2cfg. Commands and settings are
exposed to GDB by extending the certain classes.
For further information see
https://sourceware.org/gdb/current/onlinedocs/gdb/Python.html#Python.
"""


import gdb
import traceback

import gdb

from asm2cfg import asm2cfg


class SkipCalls(gdb.Parameter):
class SkipCalls(gdb.Parameter): # pylint: disable=too-few-public-methods
"""
Set \'on\' to prevent function calls from splitting assembly to further
blocks. This will provide speedup when rendering CFG if function is
big. Current value:"""

def __init__(self):
super(SkipCalls, self).__init__('skipcalls', gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN)
super().__init__('skipcalls', gdb.COMMAND_DATA, gdb.PARAM_BOOLEAN)
self.value = False
self.set_doc = SkipCalls.__doc__
self.show_doc = SkipCalls.__doc__


class ViewCfg(gdb.Command):
class ViewCfg(gdb.Command): # pylint: disable=too-few-public-methods
"""
Draw an assembly control-flow graph (CFG) of the currently executed
function. If function is big and CFG rendering takes too long, try to
Expand All @@ -32,42 +35,44 @@ class ViewCfg(gdb.Command):
"""

def __init__(self):
super(ViewCfg, self).__init__('viewcfg', gdb.COMMAND_USER)
super().__init__('viewcfg', gdb.COMMAND_USER)

def invoke(self, arg, from_tty):
def invoke(self, _arg, _from_tty): # pylint: disable=no-self-use
""" Called by GDB when viewcfg command is invoked """
try:
assembly_lines = gdb.execute('disassemble', from_tty=False, to_string=True).split('\n')
[function_name, basic_blocks] = asm2cfg.parse_lines(assembly_lines, gdb.parameter('skipcalls'))
asm2cfg.draw_cfg(function_name, basic_blocks, view=True)
# Catch error coming from GDB side before other errors.
except gdb.error as e:
raise gdb.GdbError(e)
except Exception as e:
except gdb.error as ex:
raise gdb.GdbError(ex)
except Exception as ex:
traceback.print_exc()
raise gdb.GdbError(e)
raise gdb.GdbError(ex)


class SaveCfg(gdb.Command):
class SaveCfg(gdb.Command): # pylint: disable=too-few-public-methods
"""
Save an assembly control-flow graph (CFG) of the currently executed
function. If function is big and CFG rendering takes too long, try to
skip function calls from splitting the code with 'set skipcalls on'.
"""

def __init__(self):
super(SaveCfg, self).__init__('savecfg', gdb.COMMAND_USER)
super().__init__('savecfg', gdb.COMMAND_USER)

def invoke(self, arg, from_tty):
def invoke(self, _arg, _from_tty): # pylint: disable=no-self-use
""" Called by GDB when savecfg command is invoked """
try:
assembly_lines = gdb.execute('disassemble', from_tty=False, to_string=True).split('\n')
[function_name, basic_blocks] = asm2cfg.parse_lines(assembly_lines, gdb.parameter('skipcalls'))
asm2cfg.draw_cfg(function_name, basic_blocks, view=False)
# Catch error coming from GDB side before other errors.
except gdb.error as e:
raise gdb.GdbError(e)
except Exception as e:
except gdb.error as ex:
raise gdb.GdbError(ex)
except Exception as ex:
traceback.print_exc()
raise gdb.GdbError(e)
raise gdb.GdbError(ex)


# Instantiate the settings and commands.
Expand Down