Skip to content

Commit

Permalink
Merge pull request #23 from byeongkeunahn/short-4
Browse files Browse the repository at this point in the history
Shorten the generated code in short mode
  • Loading branch information
byeongkeunahn authored Nov 30, 2023
2 parents e9225fb + 5dff351 commit f435c65
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 31 deletions.
40 changes: 34 additions & 6 deletions scripts/base91.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,29 @@
def encode(x):
def encode(x_in, use_rle=False):
sharp_insertion_points = []
if use_rle:
current_bits, current_bytes, i = 0, 0, 0
x = bytearray()
while i < len(x_in):
current_bits += 13
while current_bytes < current_bits // 8:
if i >= len(x_in):
break
x.append(x_in[i])
current_bytes += 1
i += 1
if len(x) > 0 and x[-1] == 0:
zeros_cnt = 1
while i - 1 + zeros_cnt < len(x_in) and zeros_cnt < 255 and x_in[i - 1 + zeros_cnt] == 0:
zeros_cnt += 1
if zeros_cnt >= 2:
x.pop()
x.append(zeros_cnt)
sharp_insertion_points.append((current_bits // 13 * 2) + len(sharp_insertion_points))
i += zeros_cnt - 1
sharp_insertion_points = list(reversed(sharp_insertion_points))
else:
x = x_in

out = []
i = 0
cnt5, stack5 = 0, []
Expand All @@ -21,11 +46,14 @@ def encode(x):
if bits < 13:
stack5.append((pos, bits))
cnt5 -= 8
ret = bytearray(2 * len(out) + 1)
for i in range(len(out)):
ret[2 * i + 0] = 0x24 + (out[i] % 91)
ret[2 * i + 1] = 0x24 + (out[i] // 91)
ret[-1] = ord('!')
ret = bytearray()
for pack in out:
ret.append(0x24 + (pack % 91))
ret.append(0x24 + (pack // 91))
if len(sharp_insertion_points) > 0 and len(ret) == sharp_insertion_points[-1]:
ret.append(ord(b'#'))
sharp_insertion_points.pop()
ret.append(ord(b'!'))
return bytes(ret)

if __name__ == '__main__':
Expand Down
65 changes: 40 additions & 25 deletions scripts/static-pie-gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@
sol[-1] = sol[-1].rstrip()
sol = "".join(sol)

# binary (raw)
code_raw_b91 = base91.encode(memory_bin[:-4], use_rle=True).decode('ascii')
code_raw_b91_len = len(code_raw_b91)
code_raw_b91 = '"' + code_raw_b91 + '"'

# binary
with open(compressed_binary_path, "rb") as f:
code = f.read()
Expand Down Expand Up @@ -115,29 +120,39 @@
stub_b85 = '"' + stub_b85 + '"'

# template
with open(template_path, encoding='utf8') as f:
template = f.read()
template = template.replace("\ufeff", "")

# putting it all together
# reference: https://stackoverflow.com/a/15448887
def multiple_replace(string, rep_dict):
pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL)
return pattern.sub(lambda x: rep_dict[x.group(0)], string)

out = multiple_replace(template, {
"$$$$solution_src$$$$": sol,
"$$$$stub_raw$$$$": stub_raw,
"$$$$stub_base85$$$$": stub_b85,
"$$$$stub_len$$$$": str(len(stub)),
"$$$$stub_base85_len$$$$": str(stub_b85_len),
"$$$$stub_base91$$$$": stub_b91,
"$$$$stub_base91_len$$$$": str(stub_b91_len),
"$$$$binary_base85$$$$": r,
"$$$$binary_base85_len$$$$": str(len(code_b85)),
"$$$$binary_base91$$$$": code_b91,
"$$$$binary_base91_len$$$$": str(code_b91_len),
"$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)),
"$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']),
})
template_candidates = [template_path]
if lang_name == "Rust" and "x86_64" in target_name and "short" in template_path:
template_candidates.append(template_path.replace("short", "shorter"))

out = None
for each_template_path in template_candidates:
with open(each_template_path, encoding='utf8') as f:
template = f.read()
template = template.replace("\ufeff", "")

# putting it all together
# reference: https://stackoverflow.com/a/15448887
def multiple_replace(string, rep_dict):
pattern = re.compile("|".join([re.escape(k) for k in sorted(rep_dict,key=len,reverse=True)]), flags=re.DOTALL)
return pattern.sub(lambda x: rep_dict[x.group(0)], string)

out_candidate = multiple_replace(template, {
"$$$$solution_src$$$$": sol,
"$$$$stub_raw$$$$": stub_raw,
"$$$$stub_base85$$$$": stub_b85,
"$$$$stub_len$$$$": str(len(stub)),
"$$$$stub_base85_len$$$$": str(stub_b85_len),
"$$$$stub_base91$$$$": stub_b91,
"$$$$stub_base91_len$$$$": str(stub_b91_len),
"$$$$binary_base85$$$$": r,
"$$$$binary_base85_len$$$$": str(len(code_b85)),
"$$$$binary_base91$$$$": code_b91,
"$$$$binary_base91_len$$$$": str(code_b91_len),
"$$$$binary_raw_base91$$$$": code_raw_b91,
"$$$$binary_raw_base91_len$$$$": str(code_raw_b91_len),
"$$$$min_len_4096$$$$": str(min(len(code_b85)+1, 4096)),
"$$$$entrypoint_offset$$$$": str(loader_fdict['entrypoint_offset']),
})
if out is None or len(out_candidate) < len(out):
out = out_candidate
print(out)
86 changes: 86 additions & 0 deletions scripts/static-pie-prestub-amd64-shorter.asm
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
; -*- tab-width: 4 -*-
;
; The prestub for amd64-rust target
; (prestub: the code that runs before the stub and sets the stage)
;
; build: nasm -f bin -O9 static-pie-prestub-amd64-shorter.asm -o static-pie-prestub-amd64-shorter.bin
; note: after building with the above command, run static-pie-prestub-amd64-print.py static-pie-prestub-amd64-shorter.bin
; to obtain the form that can be embedded in Rust as inline assembly.

BITS 64
ORG 0
section .text

; Align stack to 16 byte boundary
; [rsp+ 32, rsp+120): PLATFORM_DATA
; [rsp+ 0, rsp+ 32): (shadow space for win64 calling convention)
enter 56, 0

; svc_alloc_rwx for Linux
_svc_alloc_rwx:
push 9
pop rax ; syscall id of x64 mmap
cdq ; rdx=0
xor r9d, r9d ; offset
push rsi ; save rsi
xor edi, edi ; rdi=0
push 1
pop rsi ; size
mov dl, 7 ; protect (safe since we have ensured rdx=0)
push 0x22
pop r10 ; flags
push -1
pop r8 ; fd
syscall
pop rsi ; restore rsi

; PLATFORM_DATA
_t: ; PLATFORM_DATA[32..39] = ptr_alloc_rwx
push rdx ; PLATFORM_DATA[24..31] = win_GetProcAddress
push rax ; PLATFORM_DATA[16..23] = win_kernel32
push 1 ; PLATFORM_DATA[ 8..15] = env_flags (0=None, 1=ENV_FLAGS_LINUX_STYLE_CHKSTK)
push 2 ; PLATFORM_DATA[ 0.. 7] = env_id (1=Windows, 2=Linux)

; Current state: rax = new buffer
push rax
xchg rax, rdi ; rdi = new buffer

; Base91 decoder
_decode:
mov al, 0x1f ; syscall preserves rax; hence at this point rax=9
_decode_loop:
shl eax, 13
_decode_loop_2:
lodsb
sub al, 0x23
cdq
jc _jump_to_entrypoint
jz _decode_zeros
dec al
xchg eax, edx
lodsb
sub al, 0x24
imul eax, eax, 91
add eax, edx
_decode_output:
stosb
shr eax, 8
test ah, 16
jnz _decode_output
jmp _decode_loop
_decode_zeros:
xchg eax, edx
movzx ecx, byte [rdi-1]
dec rdi
rep stosb
xchg eax, edx
jmp _decode_loop_2

; Jump to entrypoint
_jump_to_entrypoint:
mov eax, dword [rdi-4]
pop rcx
add rax, rcx
push rsp
pop rcx
call rax
Binary file added scripts/static-pie-prestub-amd64-shorter.bin
Binary file not shown.
8 changes: 8 additions & 0 deletions scripts/static-pie-template-amd64-shorter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
// Generated with https://github.com/kiwiyou/basm-rs
// Learn rust (https://doc.rust-lang.org/book/) and get high performance out of the box!
// SOLUTION BEGIN
#![crate_type="cdylib"]#![no_std]#[cfg(any())]mod x{
$$$$solution_src$$$$
}
// SOLUTION END
#[no_link]extern crate std;#[no_mangle]unsafe fn _start(){std::arch::asm!(".quad 9958096a000038c8h,16aff3156c93145h,6a5a41226a07b25eh,50525e050f5841ffh,0b0974850026a016ah,99232cac0de0c11fh,0ac92c8fe16742572h,0aad0015bc06b242ch,0f77510c4f608e8c1h,48ff4fb60f92dfebh,8bd5eb92aaf3cfffh,5954c8014859fc47h,53503",in("rsi")r$$$$binary_raw_base91$$$$.as_ptr())}

0 comments on commit f435c65

Please sign in to comment.