Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update pypcode to use version >2.0.0 #27

Merged
merged 3 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 32 additions & 130 deletions bindings/python/quokka/backends/pypcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def get_pypcode_context(

Arguments:
arch: Quokka program architecture
endian: Architecture endianness

Raises:
PypcodeError: if the conversion for arch is not found
Expand All @@ -74,7 +75,7 @@ def get_pypcode_context(
quokka.analysis.ArchARM64: "AARCH64:LE:64:v8A",
quokka.analysis.ArchARMThumb: "ARM:LE:32:v8T",
quokka.analysis.ArchMIPS: "MIPS:LE:32:default",
quokka.analysis.ArchMIPS: "MIPS:LE:64:default",
quokka.analysis.ArchMIPS64: "MIPS:LE:64:default",
quokka.analysis.ArchPPC: "PowerPC:LE:32:default",
quokka.analysis.ArchPPC64: "PowerPC:LE:64:default",
}
Expand All @@ -93,105 +94,6 @@ def get_pypcode_context(
return pypcode.Context(pcode_arch)


def equality(self: pypcode.ContextObj, other: Any) -> bool:
"""Check if two pypcode objets are the same

We use monkey patching to attach the equality method to other classes and rely on
__slots__ to check which fields to check.

Arguments:
self: First object
other: Other variable

Returns:
Boolean for equality
"""
return isinstance(other, self.__class__) and all(
getattr(other, attr) == getattr(self, attr)
for attr in self.__slots__
if attr != "cobj"
)


def object_hash(obj: pypcode.ContextObj) -> int:
"""Create a hash value for a pypcode object

This allows to create set of values.

Arguments:
obj: Object to hash

Returns:
An integer for the hash
"""

assert isinstance(obj, pypcode.ContextObj)
return sum(hash(getattr(obj, attr)) for attr in obj.__slots__ if attr != "cobj")


pypcode.Varnode.__eq__ = equality
pypcode.Varnode.__hash__ = object_hash

pypcode.AddrSpace.__eq__ = equality
pypcode.AddrSpace.__hash__ = object_hash

pypcode.PcodeOp.__eq__ = equality
pypcode.PcodeOp.__hash__ = object_hash


def combine_instructions(
block: quokka.Block, translated_instructions: Sequence[pypcode.Translation]
) -> List[pypcode.PcodeOp]:
"""Combine instructions between the Quokka and PyPcode

Some instruction are split between IDA and Ghidra, so we have to account for it.
A problem for example is the support of prefixes (such LOCK) which are decoded as 2
instructions by Ghidra (wrong) but only 1 by IDA (correct).

Arguments:
block: Quokka block
translated_instructions: Translated instructions by Pypcode

Raises
PypcodeError: if the combination doesn't work

Returns:
A list of Pypcode statements
"""
pcode_instructions: List[pypcode.PcodeOp] = []
translated_instructions = iter(translated_instructions)

instruction: quokka.Instruction
for instruction in block.instructions:
instruction._pcode_insts = []
remaining_size: int = instruction.size
while remaining_size > 0:
try:
pcode_inst: pypcode.Translation = next(translated_instructions)
except StopIteration as exc:
logger.error(
"Disassembly discrepancy between Pypcode / IDA: missing inst"
)
raise quokka.PypcodeError(
f"Decoding error for block at 0x{block.start:x}"
) from exc

remaining_size -= pcode_inst.length
instruction._pcode_insts.extend(pcode_inst.ops)

if remaining_size < 0:
logger.error(
"Disassembly discrepancy between Pypcode / IDA: sizes mismatch"
)
raise quokka.PypcodeError(
f"Decoding error for block at 0x{block.start:x}"
)

pcode_instructions.extend(list(pcode_inst.ops))

return pcode_instructions


def update_pypcode_context(program: quokka.Program, is_thumb: bool) -> pypcode.Context:
"""Return an appropriate pypcode context for the decoding

Expand Down Expand Up @@ -246,19 +148,22 @@ def pypcode_decode_block(block: quokka.Block) -> List[pypcode.PcodeOp]:
block.program, first_instruction.thumb
)

# Translate
translation = context.translate(
code=block.bytes,
base=block.start,
max_inst=0,
)

if translation.error:
logger.error(translation.error.explain)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x}")
try:
# Translate
translation = context.translate(
block.bytes, # buf
block.start, # base_address
0, # max_bytes
0, # max_instructions
)
return translation.ops

pcode_instructions = combine_instructions(block, translation.instructions)
return pcode_instructions
except pypcode.BadDataError as e:
logger.error(e)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (BadDataError)")
except pypcode.UnimplError as e:
logger.error(e)
raise quokka.PypcodeError(f"Decoding error for block at 0x{block.start:x} (UnimplError)")


def pypcode_decode_instruction(
Expand All @@ -268,7 +173,7 @@ def pypcode_decode_instruction(

This will return the list of Pcode operations done for the instruction.
Note that a (binary) instruction is expected to have several pcode instructions
associated.
associated. When decoding a single instruction IMARK instructions are excluded!

Arguments:
inst: Instruction to translate
Expand All @@ -281,22 +186,19 @@ def pypcode_decode_instruction(
"""

context: pypcode.Context = update_pypcode_context(inst.program, inst.thumb)
translation = context.translate(
code=inst.bytes,
base=inst.address,
max_inst=1,
)

if not translation.error:

instructions = translation.instructions
if len(instructions) > 1:
logger.warning("Mismatch of instruction size IDA/Pypcode")

instructions = list(
itertools.chain.from_iterable(inst.ops for inst in instructions)
try:
translation = context.translate(
inst.bytes, # buf
inst.address, # base_address
0, # max_bytes
1, # max_instructions
)
return instructions

logger.error(translation.error.explain)
raise quokka.PypcodeError("Unable to decode instruction")
return [x for x in translation.ops if x.opcode != pypcode.OpCode.IMARK]

except pypcode.BadDataError as e:
logger.error(e)
raise quokka.PypcodeError(f"Unable to decode instruction (BadDataError)")
except pypcode.UnimplError as e:
logger.error(e)
raise quokka.PypcodeError(f"Unable to decode instruction (UnimplError)")
2 changes: 1 addition & 1 deletion bindings/python/quokka/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def bytes(self) -> bytes:

return block_bytes

@property
@cached_property
def pcode_insts(self) -> List[pypcode.PcodeOp]:
"""Generate PCode instructions for the block

Expand Down
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ test = [
"pytest-mock",
"pytest-cov",
"coverage[toml]",
"pypcode>=1.1.1",
"pypcode>=2.0.0",
]
pypcode = ["pypcode>=1.1.1"]
pypcode = ["pypcode>=2.0.0"]
doc = [
"mkdocs",
"mkdocs-material",
Expand All @@ -45,7 +45,7 @@ dev = [
"mypy",
"mypy-protobuf",
"nox",
"pypcode>=1.1.1",
"pypcode>=2.0.0",
]

[tool.setuptools]
Expand Down
8 changes: 4 additions & 4 deletions tests/python/tests/backends/test_pypcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
def test_pypcode_context():

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX86)
assert context.lang.id == "x86:LE:32:default"
assert context.language.id == "x86:LE:32:default"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchX64)
assert context.lang.id == "x86:LE:64:default"
assert context.language.id == "x86:LE:64:default"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM64)
assert context.lang.id == "AARCH64:LE:64:v8A"
assert context.language.id == "AARCH64:LE:64:v8A"

context = pypcode_backend.get_pypcode_context(quokka.analysis.ArchARM)
assert context.lang.id == "ARM:LE:32:v8"
assert context.language.id == "ARM:LE:32:v8"

with pytest.raises(quokka.PypcodeError):
pypcode_backend.get_pypcode_context(quokka.analysis.QuokkaArch)
Expand Down
Loading