Skip to content

Commit

Permalink
Check for jumps to preludes by default. (#4651)
Browse files Browse the repository at this point in the history
* Check for jumps to preludes by default and follow tail calls.

* Fix test.exe tests. They were text book tail calls.

┌ root.0040103b();
│           0x0040103b      cmp   ecx, dword [0x40d004]
│       ┌─< 0x00401041      jne   0x401045
│       │   0x00401043      repz  ret
└      ┌└─> 0x00401045      jmp   root.00401335

* Add a tail call test for x86

* Document rz_analysis_op_is_eob

* Attempt to remove seemingly irrelevant code.

* Make code more consice.

* Fix and simplify checks for op.type

* OR the TAIL property into the op.type if applicable.

* Fix test: Blocks were incorrectly assigned to function at 0x401000.

* Fix test: Function doesn't access a var at stack - 0x60

* Fix test: The match_suffix() doesn't have a saved prelude.

* Fix test: New name is the actual symbol name.

* Fix tests: Main functions get indeed called via a tail call.

* Fix test: Accept less string found to not increase analysis depth.

Because tail jumps are now correctly marked as end of a function,
they are not always followed deeper with the default analysis depth of 1.
Hence, less strings are found.

* Revert removal of options bound to version.

* Fix tests because they were wrong:

rz_analysis_op_is_eob() did not applied the mask onto op.type,
so it checked against the wrong flags (especially for conditional jumps).
Because the eob check was faulty the execution flow was different.

* Add tail call test for AArch64
  • Loading branch information
Rot127 authored Sep 29, 2024
1 parent 3fcfdd5 commit 0988723
Show file tree
Hide file tree
Showing 19 changed files with 214 additions and 162 deletions.
9 changes: 7 additions & 2 deletions librz/arch/analysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -468,11 +468,16 @@ RZ_API RzAnalysisOp *rz_analysis_op_hexstr(RzAnalysis *analysis, ut64 addr, cons
return op;
}

RZ_API bool rz_analysis_op_is_eob(RzAnalysisOp *op) {
/**
* \brief Checks \p op->type and \p op->eob if it marks the end of a block.
*
* \return True, if it is the end of a block. False otherwise.
*/
RZ_API bool rz_analysis_op_is_eob(const RzAnalysisOp *op) {
if (op->eob) {
return true;
}
switch (op->type) {
switch (op->type & RZ_ANALYSIS_OP_TYPE_MASK) {
case RZ_ANALYSIS_OP_TYPE_JMP:
case RZ_ANALYSIS_OP_TYPE_UJMP:
case RZ_ANALYSIS_OP_TYPE_RJMP:
Expand Down
51 changes: 30 additions & 21 deletions librz/arch/fcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,23 @@ static inline void set_bb_branches(RZ_OUT RzAnalysisBlock *bb, const ut64 jump,
bb->fail = fail;
}

/**
* \brief Peaks into the memory at the jump address.
* If it finds a function prelude, at it it returns true.
* False otherwise.
*/
static inline bool jumps_to_prelude(RzAnalysis *analysis, ut64 jmp_addr) {
ut8 buf[32] = { 0 };
(void)analysis->iob.read_at(analysis->iob.io, jmp_addr, (ut8 *)buf, sizeof(buf));
return rz_analysis_is_prelude(analysis, buf, sizeof(buf));
}

static inline bool jump_leaves_mapped_mem(RzAnalysis *analysis, ut64 insn_addr, ut64 jump_target) {
rz_return_val_if_fail(analysis, false);
RzIOMap *map = analysis->iob.map_get(analysis->iob.io, insn_addr);
return (jump_target < map->itv.addr || jump_target >= map->itv.addr + map->itv.size);
}

/**
* \brief Analyses the given task item \p item for branches.
*
Expand Down Expand Up @@ -1071,12 +1088,7 @@ static RzAnalysisBBEndCause run_basic_block_analysis(RzAnalysisTaskItem *item, R
gotoBeach(RZ_ANALYSIS_RET_END);
}
{
bool must_eob = true;
RzIOMap *map = analysis->iob.map_get(analysis->iob.io, addr);
if (map) {
must_eob = (op.jump < map->itv.addr || op.jump >= map->itv.addr + map->itv.size);
}
if (must_eob) {
if (jump_leaves_mapped_mem(analysis, addr, op.jump)) {
if (continue_after_jump && is_hexagon) {
rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp);
rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.addr + op.size, sp);
Expand All @@ -1092,26 +1104,23 @@ static RzAnalysisBBEndCause run_basic_block_analysis(RzAnalysisTaskItem *item, R
if (!overlapped) {
set_bb_branches(bb, op.jump, UT64_MAX);
}
if (jumps_to_prelude(analysis, op.jump) || op.type & RZ_ANALYSIS_OP_TYPE_TAIL) {
// Most archs don't set this flag. So we update it here.
op.type |= RZ_ANALYSIS_OP_TYPE_TAIL;
rz_analysis_xrefs_set(analysis, op.addr, op.jump, RZ_ANALYSIS_XREF_TYPE_CALL);
if (is_hexagon) {
// After the jump should always follow a dealloc instruction.
// It is not included in the block, if we do RET_END here.
break;
}
gotoBeach(RZ_ANALYSIS_RET_END);
}

rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp);
if (continue_after_jump && (is_hexagon || (is_dalvik && op.cond == RZ_TYPE_COND_EXCEPTION))) {
rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.addr + op.size, sp);
gotoBeach(RZ_ANALYSIS_RET_BRANCH);
}
int tc = analysis->opt.tailcall;
if (tc) {
int diff = op.jump - op.addr;
if (tc < 0) {
ut8 buf[32];
(void)analysis->iob.read_at(analysis->iob.io, op.jump, (ut8 *)buf, sizeof(buf));
if (rz_analysis_is_prelude(analysis, buf, sizeof(buf))) {
rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp);
}
} else if (RZ_ABS(diff) > tc) {
(void)rz_analysis_xrefs_set(analysis, op.addr, op.jump, RZ_ANALYSIS_XREF_TYPE_CALL);
rz_analysis_task_item_new(analysis, tasks, fcn, NULL, op.jump, sp);
gotoBeach(RZ_ANALYSIS_RET_END);
}
}
goto beach;
break;
case RZ_ANALYSIS_OP_TYPE_SUB:
Expand Down
47 changes: 47 additions & 0 deletions librz/arch/isa/hexagon/hexagon_arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
// Do not edit. Repository of code generator:
// https://github.com/rizinorg/rz-hexagon

#include <rz_list.h>
#include <rz_util/rz_assert.h>
#include <rz_asm.h>
#include <rz_analysis.h>
#include <rz_util.h>
Expand Down Expand Up @@ -1074,6 +1076,51 @@ static void copy_asm_ana_ops(const HexState *state, RZ_BORROW HexReversedOpcode
}
}

/**
* \brief Checks if the packet \p pkt has a jump and deallocframe instructions.
* This indicates it is a tail call.
* It sets the relevant flags accordingly.
*
* \param pkt The instruction packet to check.
*/
RZ_IPI void hexagon_pkt_mark_tail_calls(HexPkt *pkt) {
rz_return_if_fail(pkt);
ut32 n = rz_list_length(pkt->bin);
if (!pkt->last_instr_present || n < 2) {
return;
}
HexInsnContainer *hic = rz_list_get_n(pkt->bin, 0);
HexInsnContainer *hic1 = rz_list_get_n(pkt->bin, 1);
if (hic->identifier != HEX_INS_L2_DEALLOCFRAME && hic1->identifier != HEX_INS_L2_DEALLOCFRAME) {
// deallocframe is a store/load instruction and can only inhabit slot 0 and 1.
return;
}
bool is_tail_call = false;
for (size_t i = 0; i < n; ++i) {
hic = rz_list_get_n(pkt->bin, i);
if (hic->identifier == HEX_INS_J2_JUMP) {
is_tail_call = true;
break;
}
}
if (!is_tail_call) {
return;
}
for (size_t i = 0; i < n; ++i) {
hic = rz_list_get_n(pkt->bin, i);
hic->ana_op.type |= RZ_ANALYSIS_OP_TYPE_TAIL;
}
hic = rz_list_get_n(pkt->bin, n - 1);
hic->ana_op.eob = true;
// This is nonesense. And we can just hope it doesn't
// break anything. The instruction is no return instruction.
// But we just don't have any other way currently to signal the
// block analysis, that the function ends here.
// eob (end of block) is ignored.
// So until RzArch is not done, there is no other way.
hic->ana_op.type = RZ_ANALYSIS_OP_TYPE_TAIL | RZ_ANALYSIS_OP_TYPE_RET;
}

/**
* \brief Reverses a given opcode and copies the result into one of the rizin structs in rz_reverse
* if \p copy_result is set.
Expand Down
3 changes: 2 additions & 1 deletion librz/arch/isa/hexagon/hexagon_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,4 +79,5 @@ RZ_API void hex_move_insn_container(RZ_OUT HexInsnContainer *dest, const HexInsn
RZ_API HexPkt *hex_get_pkt(RZ_BORROW HexState *state, const ut32 addr);
RZ_API HexInsnContainer *hex_get_hic_at_addr(HexState *state, const ut32 addr);
RZ_API const HexOp hex_nreg_to_op(const HexInsnPktBundle *bundle, const char isa_id);
#endif
RZ_IPI void hexagon_pkt_mark_tail_calls(HexPkt *pkt);
#endif
1 change: 1 addition & 0 deletions librz/arch/isa/hexagon/hexagon_disas.c
Original file line number Diff line number Diff line change
Expand Up @@ -34293,5 +34293,6 @@ int hexagon_disasm_instruction(HexState *state, const ut32 hi_u32, RZ_INOUT HexI
snprintf(hic->bin.insn->text_infix, sizeof(hic->bin.insn->text_infix), "invalid");
}
hex_set_hic_text(hic);
hexagon_pkt_mark_tail_calls(pkt);
return 4;
}
2 changes: 1 addition & 1 deletion librz/arch/op.c
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ RZ_API char *rz_analysis_op_to_string(RzAnalysis *analysis, RzAnalysisOp *op) {
a1 = rz_str_dup("?");
}

switch (op->type) {
switch (op->type & RZ_ANALYSIS_OP_TYPE_MASK) {
case RZ_ANALYSIS_OP_TYPE_MOV:
snprintf(ret, sizeof(ret), "%s = %s", r0, a0);
break;
Expand Down
30 changes: 5 additions & 25 deletions librz/core/canalysis.c
Original file line number Diff line number Diff line change
Expand Up @@ -1853,32 +1853,12 @@ static int core_analysis_followptr(RzCore *core, int type, ut64 at, ut64 ptr, ut
}

static bool opiscall(RzCore *core, RzAnalysisOp *aop, ut64 addr, const ut8 *buf, int len, int arch) {
switch (arch) {
case RZ_ARCH_ARM64:
aop->size = 4;
// addr should be aligned by 4 in aarch64
if (addr % 4) {
char diff = addr % 4;
addr = addr - diff;
buf = buf - diff;
}
// if is not bl do not analyze
if (buf[3] == 0x94) {
if (rz_analysis_op(core->analysis, aop, addr, buf, len, RZ_ANALYSIS_OP_MASK_BASIC) > 0) {
return true;
}
}
break;
default:
aop->size = 1;
if (rz_analysis_op(core->analysis, aop, addr, buf, len, RZ_ANALYSIS_OP_MASK_BASIC) > 0) {
switch (aop->type & RZ_ANALYSIS_OP_TYPE_MASK) {
case RZ_ANALYSIS_OP_TYPE_CALL:
case RZ_ANALYSIS_OP_TYPE_CCALL:
return true;
}
if (rz_analysis_op(core->analysis, aop, addr, buf, len, RZ_ANALYSIS_OP_MASK_BASIC) > 0) {
switch (aop->type & RZ_ANALYSIS_OP_TYPE_MASK) {
case RZ_ANALYSIS_OP_TYPE_CALL:
case RZ_ANALYSIS_OP_TYPE_CCALL:
return true;
}
break;
}
return false;
}
Expand Down
7 changes: 0 additions & 7 deletions librz/core/cconfig.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,12 +139,6 @@ static bool cb_analysis_jmpretpoline(void *user, void *data) {
core->analysis->opt.retpoline = node->i_value;
return true;
}
static bool cb_analysis_jmptailcall(void *user, void *data) {
RzCore *core = (RzCore *)user;
RzConfigNode *node = (RzConfigNode *)data;
core->analysis->opt.tailcall = node->i_value;
return true;
}

static bool cb_analysis_armthumb(void *user, void *data) {
RzCore *core = (RzCore *)user;
Expand Down Expand Up @@ -3003,7 +2997,6 @@ RZ_API int rz_core_config_init(RzCore *core) {
NULL);
SETI("analysis.timeout", 0, "Stop analyzing after a couple of seconds");
SETCB("analysis.jmp.retpoline", "true", &cb_analysis_jmpretpoline, "Analyze retpolines, may be slower if not needed");
SETICB("analysis.jmp.tailcall", 0, &cb_analysis_jmptailcall, "Consume a branch as a call if delta is big");

SETCB("analysis.armthumb", "false", &cb_analysis_armthumb, "aae computes arm/thumb changes (lot of false positives ahead)");
SETCB("analysis.jmp.after", "true", &cb_analysis_afterjmp, "Continue analysis after jmp/ujmp");
Expand Down
11 changes: 1 addition & 10 deletions librz/core/disasm.c
Original file line number Diff line number Diff line change
Expand Up @@ -6435,16 +6435,7 @@ RZ_API int rz_core_disasm_pde(RzCore *core, int nb_opcodes, RzCmdStateOutput *st
rz_analysis_op_init(&op);
int ret = rz_analysis_op(core->analysis, &op, op_addr, buf + block_sz, read_len, RZ_ANALYSIS_OP_MASK_ESIL);
const bool invalid_instr = ret < 1 || op.size < 1 || op.type == RZ_ANALYSIS_OP_TYPE_ILL;
bool end_of_block = false;
switch (op.type & RZ_ANALYSIS_OP_TYPE_MASK & ~RZ_ANALYSIS_OP_HINT_MASK) {
case RZ_ANALYSIS_OP_TYPE_JMP:
case RZ_ANALYSIS_OP_TYPE_UJMP:
case RZ_ANALYSIS_OP_TYPE_CALL:
case RZ_ANALYSIS_OP_TYPE_UCALL:
case RZ_ANALYSIS_OP_TYPE_RET:
end_of_block = true;
break;
}
bool end_of_block = rz_analysis_op_is_eob(&op);
if (RZ_STR_ISNOTEMPTY(strip) && strstr(strip, rz_analysis_optype_to_string(op.type))) {
i--;
} else {
Expand Down
6 changes: 3 additions & 3 deletions librz/include/rz_analysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ typedef enum {

// XXX: this definition is plain wrong. use enum or empower bits
#define RZ_ANALYSIS_OP_TYPE_MASK 0x8000ffff
#define RZ_ANALYSIS_OP_HINT_MASK 0xf0000000
#define RZ_ANALYSIS_OP_HINT_MASK 0xff000000
typedef enum {
RZ_ANALYSIS_OP_TYPE_COND = 0x80000000, // TODO must be moved to prefix?
// TODO: MOVE TO PREFIX .. it is used by analysis_java.. must be updated
Expand All @@ -266,6 +266,7 @@ typedef enum {
RZ_ANALYSIS_OP_TYPE_REG = 0x10000000, // operand is a register
RZ_ANALYSIS_OP_TYPE_IND = 0x08000000, // operand is indirect
RZ_ANALYSIS_OP_TYPE_SIMD = 0x04000000, // SIMD
RZ_ANALYSIS_OP_TYPE_TAIL = 0x02000000, ///< Part of a tail call. This effectively marks the end of a sub-routine.
RZ_ANALYSIS_OP_TYPE_NULL = 0,
RZ_ANALYSIS_OP_TYPE_JMP = 1, /* mandatory jump */
RZ_ANALYSIS_OP_TYPE_UJMP = 2, /* unknown jump (register or so) */
Expand Down Expand Up @@ -432,7 +433,6 @@ typedef struct rz_analysis_options_t {
bool pushret; // analyze push+ret as jmp
bool armthumb; //
bool delay;
int tailcall;
bool retpoline;
} RzAnalysisOptions;

Expand Down Expand Up @@ -1613,7 +1613,7 @@ RZ_API void rz_analysis_op_free(void *op);
RZ_API void rz_analysis_op_init(RzAnalysisOp *op);
RZ_API bool rz_analysis_op_fini(RzAnalysisOp *op);
RZ_API int rz_analysis_op_reg_delta(RzAnalysis *analysis, ut64 addr, const char *name);
RZ_API bool rz_analysis_op_is_eob(RzAnalysisOp *op);
RZ_API bool rz_analysis_op_is_eob(const RzAnalysisOp *op);
RZ_API RzList /*<RzAnalysisOp *>*/ *rz_analysis_op_list_new(void);
RZ_API int rz_analysis_op(RZ_NONNULL RzAnalysis *analysis, RZ_OUT RzAnalysisOp *op, ut64 addr, const ut8 *data, ut64 len, RzAnalysisOpMask mask);
RZ_API RzAnalysisOp *rz_analysis_op_hexstr(RzAnalysis *analysis, ut64 addr, const char *hexstr);
Expand Down
20 changes: 20 additions & 0 deletions test/db/analysis/arm64
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,23 @@ EXPECT=<<EOF
\ 0x00000010 retab
EOF
RUN

NAME=tail call
FILE=bins/elf/static-glibc-2.27
CMDS=<<EOF
s 0x00401ae0
af
pdf
EOF
EXPECT=<<EOF
/ fcn.00401ae0(int64_t arg1, int64_t arg2, int64_t arg3);
| ; arg int64_t arg1 @ rdi
| ; arg int64_t arg2 @ rsi
| ; arg int64_t arg3 @ rdx
| 0x00401ae0 mov r9d, edx ; arg3
| 0x00401ae3 xor r8d, r8d
| 0x00401ae6 xor ecx, ecx
| 0x00401ae8 xor edx, edx
\ ,=< 0x00401aea jmp fcn.00402f40
EOF
RUN
4 changes: 2 additions & 2 deletions test/db/analysis/golang
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,7 @@ EXPECT=<<EOF
0x00490692 str.name:
0x0049071e str.tail:
0x00490780 str.expected__foo__or__bar__subcommands
20633
20629
EOF
RUN

Expand Down Expand Up @@ -989,6 +989,6 @@ EXPECT=<<EOF
0x00487a12 str.name:
0x00487a9e str.tail:
0x00487b00 str.expected__foo__or__bar__subcommands
3616
3612
EOF
RUN
26 changes: 26 additions & 0 deletions test/db/analysis/hexagon
Original file line number Diff line number Diff line change
Expand Up @@ -1434,3 +1434,29 @@ offset - 0 1 2 3 4 5
3400 0008 3400 0000 3400 0000 7cff ffff
EOF
RUN

NAME=hexagon tail calls
FILE=bins/elf/analysis/hexagon-hello-loop
CMDS=<<EOF
s 0x00008f04
af
pdf
EOF
EXPECT=<<EOF
/ sym._Mbtowc();
| 0x00008f04 ? R17:16 = combine(R2,R3)
| 0x00008f08 ? memd(R29+#-0x10) = R17:16 ; allocframe(#0x10)
| 0x00008f0c [ R4 = memw(GP+##0x18)
| 0x00008f10 / R19:18 = combine(R0,R1)
| 0x00008f14 \ memd(SP+##0x0) = R19:18
| ,=< 0x00008f18 [ P0 = cmp.eq(R4,#0x0); if (P0.new) jump:nt 0x8f20
| | 0x00008f1c [ callr R4
| `-> 0x00008f20 / R1:0 = combine(R18,R19)
| 0x00008f24 | R3:2 = combine(R16,R17)
| 0x00008f28 | immext(##0xd980)
| 0x00008f2c \ R4 = ##obj._Mbstate
| 0x00008f30 [ R17:16 = memd(R29+#0x8) ; R19:18 = memd(R29+#0x0)
| 0x00008f34 / jump sym._Mbtowcx
\ 0x00008f38 \ LR:FP = deallocframe(FP):raw
EOF
RUN
Loading

0 comments on commit 0988723

Please sign in to comment.