Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cache computed planes used for BBOX culling #17810

Merged
merged 3 commits into from
Jul 30, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 64 additions & 61 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
#include <cfloat>

#include "Common/Data/Convert/ColorConv.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Common/Profiler/Profiler.h"
#include "Common/LogReporting.h"
#include "Common/Math/lin/matrix4x4.h"
#include "Core/Config.h"
#include "GPU/Common/DrawEngineCommon.h"
#include "GPU/Common/SplineCommon.h"
Expand Down Expand Up @@ -136,21 +136,6 @@ std::string DrawEngineCommon::DebugGetVertexLoaderString(std::string id, DebugSh
return dec ? dec->GetString(stringType) : "N/A";
}

struct Plane {
float x, y, z, w;
void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
float Test(const float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; }
};

static void PlanesFromMatrix(const float mtx[16], Plane planes[6]) {
planes[0].Set(mtx[3]-mtx[0], mtx[7]-mtx[4], mtx[11]-mtx[8], mtx[15]-mtx[12]); // Right
planes[1].Set(mtx[3]+mtx[0], mtx[7]+mtx[4], mtx[11]+mtx[8], mtx[15]+mtx[12]); // Left
planes[2].Set(mtx[3]+mtx[1], mtx[7]+mtx[5], mtx[11]+mtx[9], mtx[15]+mtx[13]); // Bottom
planes[3].Set(mtx[3]-mtx[1], mtx[7]-mtx[5], mtx[11]-mtx[9], mtx[15]-mtx[13]); // Top
planes[4].Set(mtx[3]+mtx[2], mtx[7]+mtx[6], mtx[11]+mtx[10], mtx[15]+mtx[14]); // Near
planes[5].Set(mtx[3]-mtx[2], mtx[7]-mtx[6], mtx[11]-mtx[10], mtx[15]-mtx[14]); // Far
}

static Vec3f ClipToScreen(const Vec4f& coords) {
float xScale = gstate.getViewportXScale();
float xCenter = gstate.getViewportXCenter();
Expand Down Expand Up @@ -250,6 +235,52 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
}
}

// Gated by DIRTY_CULL_PLANES
void DrawEngineCommon::UpdatePlanes() {
float world[16];
float view[16];
float worldview[16];
float worldviewproj[16];
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
Matrix4ByMatrix4(worldview, world, view);
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);

// Next, we need to apply viewport, scissor, region, and even offset - but only for X/Y.
// Note that the PSP does not clip against the viewport.
const Vec2f baseOffset = Vec2f(gstate.getOffsetX(), gstate.getOffsetY());
// Region1 (rate) is used as an X1/Y1 here, matching PSP behavior.
minOffset_ = baseOffset + Vec2f(std::max(gstate.getRegionRateX() - 0x100, gstate.getScissorX1()), std::max(gstate.getRegionRateY() - 0x100, gstate.getScissorY1())) - Vec2f(1.0f, 1.0f);
maxOffset_ = baseOffset + Vec2f(std::min(gstate.getRegionX2(), gstate.getScissorX2()), std::min(gstate.getRegionY2(), gstate.getScissorY2())) + Vec2f(1.0f, 1.0f);

// Now let's apply the viewport to our scissor/region + offset range.
Vec2f inverseViewportScale = Vec2f(1.0f / gstate.getViewportXScale(), 1.0f / gstate.getViewportYScale());
Vec2f minViewport = (minOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
Vec2f maxViewport = (maxOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;

Lin::Matrix4x4 applyViewport;
applyViewport.empty();
// Scale to the viewport's size.
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
applyViewport.zz = 1.0f;
applyViewport.ww = 1.0f;
// And offset to the viewport's centers.
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);

float mtx[16];
Matrix4ByMatrix4(mtx, worldviewproj, applyViewport.m);

planes_[0].Set(mtx[3] - mtx[0], mtx[7] - mtx[4], mtx[11] - mtx[8], mtx[15] - mtx[12]); // Right
planes_[1].Set(mtx[3] + mtx[0], mtx[7] + mtx[4], mtx[11] + mtx[8], mtx[15] + mtx[12]); // Left
planes_[2].Set(mtx[3] + mtx[1], mtx[7] + mtx[5], mtx[11] + mtx[9], mtx[15] + mtx[13]); // Bottom
planes_[3].Set(mtx[3] - mtx[1], mtx[7] - mtx[5], mtx[11] - mtx[9], mtx[15] - mtx[13]); // Top
planes_[4].Set(mtx[3] + mtx[2], mtx[7] + mtx[6], mtx[11] + mtx[10], mtx[15] + mtx[14]); // Near
planes_[5].Set(mtx[3] - mtx[2], mtx[7] - mtx[6], mtx[11] - mtx[10], mtx[15] - mtx[14]); // Far
}

// This code has plenty of potential for optimization.
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
Expand All @@ -273,7 +304,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
verts[i] = vtx[i] * (1.0f / 128.0f);
}
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT && !inds) {
const s16 *vtx = (const s16*)control_points;
const s16 *vtx = (const s16 *)control_points;
for (int i = 0; i < vertexCount * 3; i++) {
verts[i] = vtx[i] * (1.0f / 32768.0f);
}
Expand Down Expand Up @@ -302,70 +333,42 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
}
}

Plane planes[6];

float world[16];
float view[16];
float worldview[16];
float worldviewproj[16];
ConvertMatrix4x3To4x4(world, gstate.worldMatrix);
ConvertMatrix4x3To4x4(view, gstate.viewMatrix);
// TODO: Create a Matrix4x3ByMatrix4x3, and Matrix4x4ByMatrix4x3?
Matrix4ByMatrix4(worldview, world, view);
Matrix4ByMatrix4(worldviewproj, worldview, gstate.projMatrix);

// Next, we need to apply viewport, scissor, region, and even offset - but only for X/Y.
// Note that the PSP does not clip against the viewport.
const Vec2f baseOffset = Vec2f(gstate.getOffsetX(), gstate.getOffsetY());
// Region1 (rate) is used as an X1/Y1 here, matching PSP behavior.
Vec2f minOffset = baseOffset + Vec2f(std::max(gstate.getRegionRateX() - 0x100, gstate.getScissorX1()), std::max(gstate.getRegionRateY() - 0x100, gstate.getScissorY1())) - Vec2f(1.0f, 1.0f);
Vec2f maxOffset = baseOffset + Vec2f(std::min(gstate.getRegionX2(), gstate.getScissorX2()), std::min(gstate.getRegionY2(), gstate.getScissorY2())) + Vec2f(1.0f, 1.0f);

// Now let's apply the viewport to our scissor/region + offset range.
Vec2f inverseViewportScale = Vec2f(1.0f / gstate.getViewportXScale(), 1.0f / gstate.getViewportYScale());
Vec2f minViewport = (minOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
Vec2f maxViewport = (maxOffset - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;

Lin::Matrix4x4 applyViewport;
applyViewport.empty();
// Scale to the viewport's size.
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
applyViewport.zz = 1.0f;
applyViewport.ww = 1.0f;
// And offset to the viewport's centers.
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);

float screenBounds[16];
Matrix4ByMatrix4(screenBounds, worldviewproj, applyViewport.m);
// Due to world matrix updates per "thing", this isn't quite as effective as it could be if we did world transform
// in here as well. Though, it still does cut down on a lot of updates in Tekken 6.
if (gstate_c.IsDirty(DIRTY_CULL_PLANES)) {
UpdatePlanes();
gpuStats.numPlaneUpdates++;
gstate_c.Clean(DIRTY_CULL_PLANES);
}

PlanesFromMatrix(screenBounds, planes);
// Note: near/far are not checked without clamp/clip enabled, so we skip those planes.
int totalPlanes = gstate.isDepthClampEnabled() ? 6 : 4;
for (int plane = 0; plane < totalPlanes; plane++) {
int inside = 0;
int out = 0;
for (int i = 0; i < vertexCount; i++) {
// Here we can test against the frustum planes!
float value = planes[plane].Test(verts + i * 3);
// Test against the frustum planes, and count.
// TODO: We should test 4 vertices at a time using SIMD.
// I guess could also test one vertex against 4 planes at a time, though a lot of waste at the common case of 6.
float value = planes_[plane].Test(verts + i * 3);
if (value <= -FLT_EPSILON)
out++;
else
inside++;
}

// No vertices inside this one plane? Don't need to draw.
if (inside == 0) {
// All out - but check for X and Y if the offset was near the cullbox edge.
bool outsideEdge = false;
if (plane == 1)
outsideEdge = minOffset.x < 1.0f;
outsideEdge = minOffset_.x < 1.0f;
if (plane == 2)
outsideEdge = minOffset.y < 1.0f;
outsideEdge = minOffset_.y < 1.0f;
else if (plane == 0)
outsideEdge = maxOffset.x >= 4096.0f;
outsideEdge = maxOffset_.x >= 4096.0f;
else if (plane == 3)
outsideEdge = maxOffset.y >= 4096.0f;
outsideEdge = maxOffset_.y >= 4096.0f;

// Only consider this outside if offset + scissor/region is fully inside the cullbox.
if (!outsideEdge)
Expand Down
14 changes: 14 additions & 0 deletions GPU/Common/DrawEngineCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "Common/CommonTypes.h"
#include "Common/Data/Collections/Hashmaps.h"

#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/GPUDebugInterface.h"
Expand Down Expand Up @@ -68,6 +69,13 @@ class TessellationDataTransfer {
virtual void SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) = 0;
};

// Culling plane.
struct Plane {
float x, y, z, w;
void Set(float _x, float _y, float _z, float _w) { x = _x; y = _y; z = _z; w = _w; }
float Test(const float f[3]) const { return x * f[0] + y * f[1] + z * f[2] + w; }
};

class DrawEngineCommon {
public:
DrawEngineCommon();
Expand Down Expand Up @@ -131,6 +139,7 @@ class DrawEngineCommon {

protected:
virtual bool UpdateUseHWTessellation(bool enabled) const { return enabled; }
void UpdatePlanes();

int ComputeNumVertsToDecode() const;
void DecodeVerts(u8 *dest);
Expand Down Expand Up @@ -236,4 +245,9 @@ class DrawEngineCommon {

// Hardware tessellation
TessellationDataTransfer *tessDataTransfer;

// Culling
Plane planes_[6];
Vec2f minOffset_;
Vec2f maxOffset_;
};
3 changes: 2 additions & 1 deletion GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,15 @@ enum : uint64_t {
DIRTY_LIGHT_CONTROL = 1ULL << 38,
DIRTY_TEX_ALPHA_MUL = 1ULL << 39,

// Bits 40-43 are free for new uniforms. Then we're really out and need to start merging.
// Bits 40-42 are free for new uniforms. Then we're really out and need to start merging.
// Don't forget to update DIRTY_ALL_UNIFORMS when you start using them.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x0FFFFFFFFFFULL,

// Other dirty elements that aren't uniforms
DIRTY_CULL_PLANES = 1ULL << 43,
DIRTY_FRAMEBUF = 1ULL << 44,
DIRTY_TEXTURE_IMAGE = 1ULL << 45, // Means that the definition of the texture image has changed (address, stride etc), and we need to look up again.
DIRTY_TEXTURE_PARAMS = 1ULL << 46,
Expand Down
9 changes: 5 additions & 4 deletions GPU/GPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ inline unsigned int toFloat24(float f) {
return i >> 8;
}

// The ToString function lives in GPUCommonHW.cpp.
struct GPUStatistics {
void Reset() {
ResetFrame();
Expand All @@ -84,10 +85,10 @@ struct GPUStatistics {
numTextureInvalidations = 0;
numTextureInvalidationsByFramebuffer = 0;
numTexturesHashed = 0;
numTextureSwitches = 0;
numTextureDataBytesHashed = 0;
numShaderSwitches = 0;
numFlushes = 0;
numBBOXJumps = 0;
numPlaneUpdates = 0;
numTexturesDecoded = 0;
numFramebufferEvaluations = 0;
numBlockingReadbacks = 0;
Expand All @@ -114,6 +115,8 @@ struct GPUStatistics {
int numListSyncs;
int numCachedDrawCalls;
int numFlushes;
int numBBOXJumps;
int numPlaneUpdates;
int numVertsSubmitted;
int numCachedVertsDrawn;
int numUncachedVertsDrawn;
Expand All @@ -122,8 +125,6 @@ struct GPUStatistics {
int numTextureInvalidationsByFramebuffer;
int numTexturesHashed;
int numTextureDataBytesHashed;
int numTextureSwitches;
int numShaderSwitches;
int numTexturesDecoded;
int numFramebufferEvaluations;
int numBlockingReadbacks;
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,7 @@ void GPUCommon::Execute_BJump(u32 op, u32 diff) {
if (!currentList->bboxResult) {
// bounding box jump.
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
gpuStats.numBBOXJumps++;
if (Memory::IsValidAddress(target)) {
UpdatePC(currentList->pc, target - 4);
currentList->pc = target - 4; // pc will be increased after we return, counteract that
Expand Down
38 changes: 20 additions & 18 deletions GPU/GPUCommonHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE },

// Viewport.
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE },
Expand All @@ -206,12 +206,12 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },

// Region
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },

// Scissor
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },

// Lighting base colors
{ GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
Expand Down Expand Up @@ -1368,7 +1368,7 @@ void GPUCommonHW::Execute_WorldMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_WORLDMATRIX);
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1391,7 +1391,7 @@ void GPUCommonHW::Execute_WorldMtxData(u32 op, u32 diff) {
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
Flush();
((u32 *)gstate.worldMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_WORLDMATRIX);
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_CULL_PLANES);
}
num++;
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
Expand Down Expand Up @@ -1421,7 +1421,7 @@ void GPUCommonHW::Execute_ViewMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_VIEWMATRIX);
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1444,7 +1444,7 @@ void GPUCommonHW::Execute_ViewMtxData(u32 op, u32 diff) {
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
Flush();
((u32 *)gstate.viewMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_VIEWMATRIX);
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
}
num++;
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
Expand Down Expand Up @@ -1474,7 +1474,7 @@ void GPUCommonHW::Execute_ProjMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
}
if (++i >= end) {
break;
Expand All @@ -1497,7 +1497,7 @@ void GPUCommonHW::Execute_ProjMtxData(u32 op, u32 diff) {
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
Flush();
((u32 *)gstate.projMatrix)[num] = newVal;
gstate_c.Dirty(DIRTY_PROJMATRIX);
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
}
num++;
if (num <= 16)
Expand Down Expand Up @@ -1647,8 +1647,8 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
return snprintf(buffer, size,
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
"Draw calls: %d, flushes %d, clears %d (cached: %d)\n"
"Num Tracked Vertex Arrays: %d\n"
"Draw calls: %d, flushes %d, clears %d, bbox jumps %d (%d updates)\n"
"Cached draws: %d (tracked: %d)\n"
"Vertices: %d cached: %d uncached: %d\n"
"FBOs active: %d (evaluations: %d)\n"
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB\n"
Expand All @@ -1662,6 +1662,8 @@ size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
gpuStats.numDrawCalls,
gpuStats.numFlushes,
gpuStats.numClears,
gpuStats.numBBOXJumps,
gpuStats.numPlaneUpdates,
gpuStats.numCachedDrawCalls,
gpuStats.numTrackedVertexArrays,
gpuStats.numVertsSubmitted,
Expand Down