From f5853f6925d8e3394e58ae4b1c4888f6f389ff05 Mon Sep 17 00:00:00 2001 From: Mads Ynddal Date: Thu, 11 Jul 2024 22:42:27 -0700 Subject: [PATCH] deduplicate scanline code with inlined helpers --- pyboy/core/lcd.pxd | 7 ++- pyboy/core/lcd.py | 149 ++++++++++++++++++--------------------------- 2 files changed, 63 insertions(+), 93 deletions(-) diff --git a/pyboy/core/lcd.pxd b/pyboy/core/lcd.pxd index 4c4278eb1..f63141c1e 100644 --- a/pyboy/core/lcd.pxd +++ b/pyboy/core/lcd.pxd @@ -161,13 +161,14 @@ cdef class Renderer: ) cdef void scanline(self, LCD, int) noexcept nogil + @cython.locals(tile_addr=uint64_t, tile=int) + cdef inline (int, int, uint16_t) _get_tile(self, uint8_t, uint8_t, uint16_t, LCD) noexcept nogil + cdef inline (int, int, uint8_t, bint, uint32_t, bint) _get_tile_cgb(self, uint8_t, uint8_t, uint16_t, LCD) noexcept nogil @cython.locals(col0=uint8_t) + cdef inline void _pixel(self, uint8_t[:,:], uint32_t, int, int, int, int, uint32_t) noexcept nogil cdef int scanline_background(self, int, int, int, int, int, LCD) noexcept nogil - @cython.locals(col0=uint8_t) cdef int scanline_window(self, int, int, int, int, int, LCD) noexcept nogil - @cython.locals(col0=uint8_t) cdef int scanline_background_cgb(self, int, int, int, int, int, LCD) noexcept nogil - @cython.locals(col0=uint8_t) cdef int scanline_window_cgb(self, int, int, int, int, int, LCD) noexcept nogil cdef int scanline_blank(self, int, int, int, LCD) noexcept nogil diff --git a/pyboy/core/lcd.py b/pyboy/core/lcd.py index 21b76a77a..52dd65043 100644 --- a/pyboy/core/lcd.py +++ b/pyboy/core/lcd.py @@ -500,72 +500,73 @@ def scanline(self, lcd, y): # Reset at the end of a frame. We set it to -1, so it will be 0 after the first increment self.ly_window = -1 + def _get_tile(self, y, x, offset, lcd): + tile_addr = offset + y//8*32%0x400 + x//8%32 + tile = lcd.VRAM0[tile_addr] + + # If using signed tile indices, modify index + if not lcd._LCDC.tiledata_select: + # (x ^ 0x80 - 128) to convert to signed, then + # add 256 for offset (reduces to + 128) + tile = (tile ^ 0x80) + 128 + + yy = 8*tile + y%8 + return tile, yy, tile_addr + + def _get_tile_cgb(self, y, x, offset, lcd): + tile, yy, tile_addr = self._get_tile(y, x, offset, lcd) + + palette, vbank, horiflip, vertflip, bg_priority = self._cgb_get_background_map_attributes(lcd, tile_addr) + + bg_priority_apply = 0 + if bg_priority: + # We hide extra rendering information in the lower 8 bits (A) of the 32-bit RGBA format + bg_priority_apply = BG_PRIORITY_FLAG + + if vertflip: + yy = (8*tile + (7 - (y) % 8)) + + return tile, yy, palette, horiflip, bg_priority_apply, vbank + + def _pixel(self, tilecache, pixel, x, y, xx, yy, bg_priority_apply): + col0 = (tilecache[yy, xx] == 0) & 1 + self._screenbuffer[y, x] = pixel + # COL0_FLAG is 1 + self._screenbuffer_attributes[y, x] = bg_priority_apply | col0 + def scanline_window(self, y, _x, wx, wy, cols, lcd): for x in range(_x, _x + cols): xx = (x-wx) % 8 if xx == 0 or x == _x: - tile_addr = lcd._LCDC.windowmap_offset + (self.ly_window) // 8 * 32 % 0x400 + (x-wx) // 8 % 32 - wt = lcd.VRAM0[tile_addr] - - # If using signed tile indices, modify index - if not lcd._LCDC.tiledata_select: - # (x ^ 0x80 - 128) to convert to signed, then - # add 256 for offset (reduces to + 128) - wt = (wt ^ 0x80) + 128 - - yy = 8*wt + (self.ly_window) % 8 + wt, yy, _ = self._get_tile(self.ly_window, x - wx, lcd._LCDC.windowmap_offset, lcd) self.update_tilecache0(lcd, wt, 0) # TODO: Dynamic direct mapping? pixel = lcd.BGP.palette_mem_rgb[lcd.BGP.lookup[self._tilecache0[yy, xx]]] - - col0 = (self._tilecache0[yy, xx] == 0) & 1 - self._screenbuffer[y, x] = pixel - # COL0_FLAG is 1 - self._screenbuffer_attributes[y, x] = col0 + self._pixel(self._tilecache0, pixel, x, y, xx, yy, 0) return cols def scanline_window_cgb(self, y, _x, wx, wy, cols, lcd): + bg_priority_apply = 0 for x in range(_x, _x + cols): xx = (x-wx) % 8 if xx == 0 or x == _x: - tile_addr = lcd._LCDC.windowmap_offset + (self.ly_window) // 8 * 32 % 0x400 + (x-wx) // 8 % 32 - wt = lcd.VRAM0[tile_addr] - - # If using signed tile indices, modify index - if not lcd._LCDC.tiledata_select: - # (x ^ 0x80 - 128) to convert to signed, then - # add 256 for offset (reduces to + 128) - wt = (wt ^ 0x80) + 128 - - w_palette, w_vbank, w_horiflip, w_vertflip, w_bg_priority = self._cgb_get_background_map_attributes( - lcd, tile_addr + wt, yy, w_palette, w_horiflip, bg_priority_apply, vbank = self._get_tile_cgb( + self.ly_window, x - wx, lcd._LCDC.windowmap_offset, lcd ) - if w_vbank: - self.update_tilecache1(lcd, wt, w_vbank) - w_tilecache = self._tilecache1 - else: - self.update_tilecache0(lcd, wt, w_vbank) - w_tilecache = self._tilecache0 - - if w_vertflip: - yy = (8*wt + (7 - (self.ly_window) % 8)) + # NOTE: Not allowed to return memoryview in Cython tuple + if vbank: + self.update_tilecache1(lcd, wt, vbank) + tilecache = self._tilecache1 else: - yy = 8*wt + (self.ly_window) % 8 + self.update_tilecache0(lcd, wt, vbank) + tilecache = self._tilecache0 if w_horiflip: xx = 7 - xx - pixel = lcd.bcpd.palette_mem_rgb[w_palette*4 + w_tilecache[yy, xx]] - bg_priority_apply = 0 - if w_bg_priority: - # We hide extra rendering information in the lower 8 bits (A) of the 32-bit RGBA format - bg_priority_apply = BG_PRIORITY_FLAG - - col0 = (w_tilecache[yy, xx] == 0) & 1 - self._screenbuffer[y, x] = pixel - # COL0_FLAG is 1 - self._screenbuffer_attributes[y, x] = bg_priority_apply | col0 + pixel = lcd.bcpd.palette_mem_rgb[w_palette*4 + tilecache[yy, xx]] + self._pixel(tilecache, pixel, x, y, xx, yy, bg_priority_apply) return cols def scanline_background(self, y, _x, bx, by, cols, lcd): @@ -573,16 +574,7 @@ def scanline_background(self, y, _x, bx, by, cols, lcd): # bx mask used for the half tile at the left side when scrolling b_xx = (x + (bx & 0b111)) % 8 if b_xx == 0 or x == 0: - tile_addr = lcd._LCDC.backgroundmap_offset + (y+by) // 8 * 32 % 0x400 + (x+bx) // 8 % 32 - bt = lcd.VRAM0[tile_addr] - - # If using signed tile indices, modify index - if not lcd._LCDC.tiledata_select: - # (x ^ 0x80 - 128) to convert to signed, then - # add 256 for offset (reduces to + 128) - bt = (bt ^ 0x80) + 128 - - b_yy = 8*bt + (y+by) % 8 + bt, b_yy, _ = self._get_tile(y + by, x + bx, lcd._LCDC.backgroundmap_offset, lcd) self.update_tilecache0(lcd, bt, 0) xx = b_xx @@ -590,10 +582,7 @@ def scanline_background(self, y, _x, bx, by, cols, lcd): # TODO: Dynamic direct mapping? pixel = lcd.BGP.palette_mem_rgb[lcd.BGP.lookup[self._tilecache0[yy, xx]]] - - col0 = (self._tilecache0[yy, xx] == 0) & 1 - self._screenbuffer[y, x] = pixel - self._screenbuffer_attributes[y, x] = col0 + self._pixel(self._tilecache0, pixel, x, y, xx, yy, 0) return cols def scanline_background_cgb(self, y, _x, bx, by, cols, lcd): @@ -601,42 +590,22 @@ def scanline_background_cgb(self, y, _x, bx, by, cols, lcd): # bx mask used for the half tile at the left side when scrolling xx = (x + (bx & 0b111)) % 8 if xx == 0 or x == 0: - tile_addr = lcd._LCDC.backgroundmap_offset + (y+by) // 8 * 32 % 0x400 + (x+bx) // 8 % 32 - bt = lcd.VRAM0[tile_addr] - - # If using signed tile indices, modify index - if not lcd._LCDC.tiledata_select: - # (x ^ 0x80 - 128) to convert to signed, then - # add 256 for offset (reduces to + 128) - bt = (bt ^ 0x80) + 128 - - b_palette, b_vbank, b_horiflip, b_vertflip, b_bg_priority = self._cgb_get_background_map_attributes( - lcd, tile_addr + bt, yy, b_palette, b_horiflip, bg_priority_apply, vbank = self._get_tile_cgb( + y + by, x + bx, lcd._LCDC.backgroundmap_offset, lcd ) - if b_vbank: - self.update_tilecache1(lcd, bt, b_vbank) - b_tilecache = self._tilecache1 + # NOTE: Not allowed to return memoryview in Cython tuple + if vbank: + self.update_tilecache1(lcd, bt, vbank) + tilecache = self._tilecache1 else: - self.update_tilecache0(lcd, bt, b_vbank) - b_tilecache = self._tilecache0 - - if b_vertflip: - yy = (8*bt + (7 - (y+by) % 8)) - else: - yy = 8*bt + (y+by) % 8 + self.update_tilecache0(lcd, bt, vbank) + tilecache = self._tilecache0 if b_horiflip: xx = 7 - xx - pixel = lcd.bcpd.palette_mem_rgb[b_palette*4 + b_tilecache[yy, xx]] - bg_priority_apply = 0 - if b_bg_priority: - # We hide extra rendering information in the lower 8 bits (A) of the 32-bit RGBA format - bg_priority_apply = BG_PRIORITY_FLAG - - col0 = (b_tilecache[yy, xx] == 0) & 1 - self._screenbuffer[y, x] = pixel - self._screenbuffer_attributes[y, x] = bg_priority_apply | col0 + pixel = lcd.bcpd.palette_mem_rgb[b_palette*4 + tilecache[yy, xx]] + self._pixel(tilecache, pixel, x, y, xx, yy, bg_priority_apply) return cols def scanline_blank(self, y, _x, cols, lcd):