From 5f7dd2527fb39ce24feb24a4b75323cf64729d57 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Thu, 30 May 2024 09:57:43 +0200 Subject: [PATCH] flate, zstd: Shave some bytes off amd64 matchLen (#963) XORQ sets the flags, so there is no need for TESTQ. Use a 32-bit shift after TZCNT/BSF. --- flate/matchlen_amd64.s | 10 ++++------ zstd/matchlen_amd64.s | 10 ++++------ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/flate/matchlen_amd64.s b/flate/matchlen_amd64.s index 9a7655c0f7..0782b86e3d 100644 --- a/flate/matchlen_amd64.s +++ b/flate/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end diff --git a/zstd/matchlen_amd64.s b/zstd/matchlen_amd64.s index 9a7655c0f7..0782b86e3d 100644 --- a/zstd/matchlen_amd64.s +++ b/zstd/matchlen_amd64.s @@ -5,7 +5,6 @@ #include "textflag.h" // func matchLen(a []byte, b []byte) int -// Requires: BMI TEXT ·matchLen(SB), NOSPLIT, $0-56 MOVQ a_base+0(FP), AX MOVQ b_base+24(FP), CX @@ -17,17 +16,16 @@ TEXT ·matchLen(SB), NOSPLIT, $0-56 JB matchlen_match4_standalone matchlen_loopback_standalone: - MOVQ (AX)(SI*1), BX - XORQ (CX)(SI*1), BX - TESTQ BX, BX - JZ matchlen_loop_standalone + MOVQ (AX)(SI*1), BX + XORQ (CX)(SI*1), BX + JZ matchlen_loop_standalone #ifdef GOAMD64_v3 TZCNTQ BX, BX #else BSFQ BX, BX #endif - SARQ $0x03, BX + SHRL $0x03, BX LEAL (SI)(BX*1), SI JMP gen_match_len_end