From a508f7e3459bf463b88fcd45003d1d0f9c38b2f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 11 Jan 2023 17:45:55 +0100 Subject: [PATCH 1/5] Shave a few instructions from lighting shaders by prenormalizing the light dir used for spotlights --- GPU/Common/ShaderUniforms.cpp | 33 ++++++++++++++++++---------- GPU/Common/VertexShaderGenerator.cpp | 4 ++-- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 923a96c31516..bac4bffab7fd 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -305,6 +305,25 @@ uint32_t PackLightControlBits() { return lightControl; } +// Note: If length is 0.0, it's gonna be left as 0.0 instead of normalized. +void ExpandFloat24x3ToFloat4AndNormalize(float dest[4], const uint32_t src[3]) { + float temp[4]; + ExpandFloat24x3ToFloat4(temp, src); + // TODO: Reuse code from NormalizedOr001 and optimize + float x = temp[0]; + float y = temp[1]; + float z = temp[2]; + float len = sqrtf(x * x + y * y + z * z); + if (len == 0.0f) + return; + + len = 1.0f / len; + dest[0] = x * len; + dest[1] = y * len; + dest[2] = z * len; + dest[3] = 0.0f; +} + void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { @@ -327,20 +346,12 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { // Prenormalize - float x = getFloat24(gstate.lpos[i * 3 + 0]); - float y = getFloat24(gstate.lpos[i * 3 + 1]); - float z = getFloat24(gstate.lpos[i * 3 + 2]); - float len = sqrtf(x*x + y*y + z*z); - if (len == 0.0f) - len = 1.0f; - else - len = 1.0f / len; - float vec[3] = { x * len, y * len, z * len }; - CopyFloat3To4(ub->lpos[i], vec); + ExpandFloat24x3ToFloat4AndNormalize(ub->lpos[i], &gstate.lpos[i * 3]); } else { ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]); } - ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]); + // ldir is only used for spotlights. Prenormalize it. + ExpandFloat24x3ToFloat4AndNormalize(ub->ldir[i], &gstate.ldir[i * 3]); ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]); float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) }; CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef); diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index f051ec03e0b4..9019cbd94bc1 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1057,7 +1057,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr); p.C(" break;\n"); p.C(" case 2:\n"); // GE_LIGHTTYPE_SPOT - p.F(" angle = length(u_lightdir%s) == 0.0 ? 0.0 : dot(normalize(u_lightdir%s), toLight);\n", iStr, iStr); + p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr, iStr); p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr); p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); p.C(" } else {\n"); @@ -1133,7 +1133,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag break; case GE_LIGHTTYPE_SPOT: case GE_LIGHTTYPE_UNKNOWN: - p.F(" angle = length(u_lightdir%s) == 0.0 ? 0.0 : dot(normalize(u_lightdir%s), toLight);\n", iStr, iStr); + p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr, iStr); p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr); p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); p.C(" } else {\n"); From 4c2a41cc8376bb58f5f127c89adaef6aad9f2cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 11 Jan 2023 18:07:06 +0100 Subject: [PATCH 2/5] Break out the attenuation term, too --- GPU/Common/ShaderUniforms.cpp | 1 + GPU/Common/VertexShaderGenerator.cpp | 13 +++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index bac4bffab7fd..57b68d1f8572 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -294,6 +294,7 @@ uint32_t PackLightControlBits() { u32 computation = (u32)gstate.getLightComputation(i); // 2 bits u32 type = (u32)gstate.getLightType(i); // 2 bits + if (type == 3) { type = 0; } // Don't want to handle this degenerate case in the shader. lightControl |= computation << (4 + i * 4); lightControl |= type << (4 + i * 4 + 2); } diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 9019cbd94bc1..95070de2e085 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1019,7 +1019,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (lightUberShader) { // We generate generic code that can calculate any combination of lights specified // in u_lightControl. u_lightControl is computed in PackLightControlBits(). - p.C(" uint comp; uint type;\n"); + p.C(" uint comp; uint type; float attenuation;\n"); if (useIndexing) { p.C(" for (uint i = 0; i < 4; i++) {\n"); } @@ -1043,6 +1043,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag p.F(" toLight -= worldpos;\n", iStr); p.F(" distance = length(toLight);\n"); p.F(" toLight /= distance;\n"); + p.F(" attenuation = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr); p.C(" }\n"); p.C(" ldot = dot(toLight, worldnormal);\n"); p.C(" if (comp == 0x2u) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE @@ -1054,12 +1055,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag p.C(" }\n"); p.C(" switch (int(type)) {\n"); // Attenuation p.C(" case 1:\n"); // GE_LIGHTTYPE_POINT - p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr); + p.C(" lightScale = attenuation;\n"); p.C(" break;\n"); p.C(" case 2:\n"); // GE_LIGHTTYPE_SPOT - p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr, iStr); + p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr); p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr); - p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); + p.F(" lightScale = attenuation * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); p.C(" } else {\n"); p.C(" lightScale = 0.0;\n"); p.C(" }\n"); @@ -1170,10 +1171,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } else { if (lightUberShader) { p.C(" bool lmode = (u_lightControl & (0x1u << 0x17u)) != 0x0u;\n"); - p.C(" if (lmode) {"); + p.C(" if (lmode) {\n"); p.F(" %sv_color0 = lightSum0;\n", compat.vsOutPrefix); p.F(" %sv_color1 = clamp(lightSum1, 0.0, 1.0);\n", compat.vsOutPrefix); - p.C(" } else {"); + p.C(" } else {\n"); p.F(" %sv_color0 = clamp(lightSum0 + vec4(lightSum1, 0.0), 0.0, 1.0);\n", compat.vsOutPrefix); p.F(" %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix); p.C(" }"); From 1c8e456c97ed6db11332b05c397656a39b697a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 11 Jan 2023 18:21:07 +0100 Subject: [PATCH 3/5] Also apply this to OpenGL and D3D9 --- Common/Data/Convert/SmallDataConvert.h | 18 ++++++++++++++++++ GPU/Common/ShaderUniforms.cpp | 19 ------------------- GPU/Directx9/ShaderManagerDX9.cpp | 20 ++++++++------------ GPU/Directx9/ShaderManagerDX9.h | 1 + GPU/GLES/ShaderManagerGLES.cpp | 21 +++++++++------------ 5 files changed, 36 insertions(+), 43 deletions(-) diff --git a/Common/Data/Convert/SmallDataConvert.h b/Common/Data/Convert/SmallDataConvert.h index 03c60fa88e38..90a9f1f96357 100644 --- a/Common/Data/Convert/SmallDataConvert.h +++ b/Common/Data/Convert/SmallDataConvert.h @@ -2,6 +2,7 @@ #include #include +#include #include "Common/Common.h" #include "ppsspp_config.h" @@ -227,6 +228,23 @@ inline void ExpandFloat24x3ToFloat4(float dest[4], const uint32_t src[3]) { #endif } +// Note: If length is 0.0, it's gonna be left as 0.0 instead of trying to normalize. This is important. +inline void ExpandFloat24x3ToFloat4AndNormalize(float dest[4], const uint32_t src[3]) { + float temp[4]; + ExpandFloat24x3ToFloat4(temp, src); + // TODO: Reuse code from NormalizedOr001 and optimize + float x = temp[0]; + float y = temp[1]; + float z = temp[2]; + float len = sqrtf(x * x + y * y + z * z); + if (len != 0.0f) + len = 1.0f / len; + dest[0] = x * len; + dest[1] = y * len; + dest[2] = z * len; + dest[3] = 0.0f; +} + inline uint32_t BytesToUint32(uint8_t a, uint8_t b, uint8_t c, uint8_t d) { return (a) | (b << 8) | (c << 16) | (d << 24); } diff --git a/GPU/Common/ShaderUniforms.cpp b/GPU/Common/ShaderUniforms.cpp index 57b68d1f8572..2d5211fc27d4 100644 --- a/GPU/Common/ShaderUniforms.cpp +++ b/GPU/Common/ShaderUniforms.cpp @@ -306,25 +306,6 @@ uint32_t PackLightControlBits() { return lightControl; } -// Note: If length is 0.0, it's gonna be left as 0.0 instead of normalized. -void ExpandFloat24x3ToFloat4AndNormalize(float dest[4], const uint32_t src[3]) { - float temp[4]; - ExpandFloat24x3ToFloat4(temp, src); - // TODO: Reuse code from NormalizedOr001 and optimize - float x = temp[0]; - float y = temp[1]; - float z = temp[2]; - float len = sqrtf(x * x + y * y + z * z); - if (len == 0.0f) - return; - - len = 1.0f / len; - dest[0] = x * len; - dest[1] = y * len; - dest[2] = z * len; - dest[3] = 0.0f; -} - void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) { // Lighting if (dirtyUniforms & DIRTY_AMBIENT) { diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 64dbb4454407..7f6039301ac9 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -207,6 +207,12 @@ void ShaderManagerDX9::VSSetFloat24Uniform3(int creg, const u32 data[3]) { device_->SetVertexShaderConstantF(creg, f, 1); } +void ShaderManagerDX9::VSSetFloat24Uniform3Normalized(int creg, const u32 data[3]) { + float f[4]; + ExpandFloat24x3ToFloat4AndNormalize(f, data); + device_->SetVertexShaderConstantF(creg, f, 1); +} + void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) { float f[4]; Uint8x3ToFloat4_AlphaUint8(f, color, alpha); @@ -495,21 +501,11 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) { for (int i = 0; i < 4; i++) { if (dirtyUniforms & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { - // Prenormalize - float x = getFloat24(gstate.lpos[i * 3 + 0]); - float y = getFloat24(gstate.lpos[i * 3 + 1]); - float z = getFloat24(gstate.lpos[i * 3 + 2]); - float len = sqrtf(x*x + y*y + z*z); - if (len == 0.0f) - len = 1.0f; - else - len = 1.0f / len; - float vec[3] = { x * len, y * len, z * len }; - VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3); + VSSetFloat24Uniform3Normalized(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); } else { VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]); } - VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); + VSSetFloat24Uniform3Normalized(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]); VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]); float angle_spotCoef[4] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) }; VSSetFloatUniform4(CONST_VS_LIGHTANGLE_SPOTCOEF + i, angle_spotCoef); diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 372fbf9659af..299eb677d2f3 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -104,6 +104,7 @@ class ShaderManagerDX9 : public ShaderManagerCommon { void VSSetFloat(int creg, float value); void VSSetFloatArray(int creg, const float *value, int count); void VSSetFloat24Uniform3(int creg, const u32 data[3]); + void VSSetFloat24Uniform3Normalized(int creg, const u32 data[3]); void VSSetFloatUniform4(int creg, const float data[4]); void Clear(); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 479d4f5669a8..c7a078582cd0 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -293,6 +293,12 @@ static void SetFloat24Uniform3(GLRenderManager *render, GLint *uniform, const ui render->SetUniformF(uniform, 3, f); } +static void SetFloat24Uniform3Normalized(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) { + float f[4]; + ExpandFloat24x3ToFloat4AndNormalize(f, data); + render->SetUniformF(uniform, 3, f); +} + static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data[4]) { render->SetUniformF(uniform, 4, data); } @@ -650,21 +656,12 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin for (int i = 0; i < 4; i++) { if (dirty & (DIRTY_LIGHT0 << i)) { if (gstate.isDirectionalLight(i)) { - // Prenormalize - float x = getFloat24(gstate.lpos[i * 3 + 0]); - float y = getFloat24(gstate.lpos[i * 3 + 1]); - float z = getFloat24(gstate.lpos[i * 3 + 2]); - float len = sqrtf(x*x + y*y + z*z); - if (len == 0.0f) - len = 1.0f; - else - len = 1.0f / len; - float vec[3] = { x * len, y * len, z * len }; - render_->SetUniformF(&u_lightpos[i], 3, vec); + // Prenormalize for cheaper calculations in shader + SetFloat24Uniform3Normalized(render_, &u_lightpos[i], &gstate.lpos[i * 3]); } else { SetFloat24Uniform3(render_, &u_lightpos[i], &gstate.lpos[i * 3]); } - if (u_lightdir[i] != -1) SetFloat24Uniform3(render_, &u_lightdir[i], &gstate.ldir[i * 3]); + if (u_lightdir[i] != -1) SetFloat24Uniform3Normalized(render_, &u_lightdir[i], &gstate.ldir[i * 3]); if (u_lightatt[i] != -1) SetFloat24Uniform3(render_, &u_lightatt[i], &gstate.latt[i * 3]); if (u_lightangle_spotCoef[i] != -1) { float lightangle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) }; From 965007d65bded4b6dff35a17f3204c7028c0d1db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 11 Jan 2023 18:54:51 +0100 Subject: [PATCH 4/5] Tighten up the lighting shader code a bit, hopefully eliminating a branch or two. --- GPU/Common/VertexShaderGenerator.cpp | 52 +++++++++++++--------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 95070de2e085..1e99e0834c9a 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1040,43 +1040,39 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag } p.F(" toLight = u_lightpos%s;\n", iStr); p.C(" if (type != 0x0u) {\n"); // GE_LIGHTTYPE_DIRECTIONAL - p.F(" toLight -= worldpos;\n", iStr); + p.F(" toLight -= worldpos;\n"); p.F(" distance = length(toLight);\n"); p.F(" toLight /= distance;\n"); p.F(" attenuation = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr); + p.C(" if (type == 0x01u) {\n"); // GE_LIGHTTYPE_POINT + p.C(" lightScale = attenuation;\n"); + p.C(" } else {\n"); // type must be 0x02 - GE_LIGHTTYPE_SPOT + p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr); + p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr); + p.F(" lightScale = attenuation * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); + p.C(" } else {\n"); + p.C(" lightScale = 0.0;\n"); + p.C(" }\n"); + p.C(" }\n"); + p.C(" } else {\n"); + p.C(" lightScale = 1.0;\n"); // GE_LIGHTTYPE_DIRECTIONAL p.C(" }\n"); p.C(" ldot = dot(toLight, worldnormal);\n"); p.C(" if (comp == 0x2u) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE - p.C(" if (u_matspecular.a <= 0.0) {\n"); - p.C(" ldot = 1.0;\n"); - p.C(" } else {\n"); + p.C(" if (u_matspecular.a > 0.0) {\n"); p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); - p.C(" }\n"); - p.C(" }\n"); - p.C(" switch (int(type)) {\n"); // Attenuation - p.C(" case 1:\n"); // GE_LIGHTTYPE_POINT - p.C(" lightScale = attenuation;\n"); - p.C(" break;\n"); - p.C(" case 2:\n"); // GE_LIGHTTYPE_SPOT - p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr); - p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr); - p.F(" lightScale = attenuation * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr); p.C(" } else {\n"); - p.C(" lightScale = 0.0;\n"); + p.C(" ldot = 1.0;\n"); p.C(" }\n"); - p.C(" break;\n"); - p.C(" default:\n"); // GE_LIGHTTYPE_DIRECTIONAL - p.C(" lightScale = 1.0;\n"); - p.C(" break;\n"); p.C(" }\n"); p.F(" diffuse = (u_lightdiffuse%s * diffuseColor) * max(ldot, 0.0);\n", iStr); p.C(" if (comp == 0x1u) {\n"); // do specular p.C(" if (ldot >= 0.0) {\n"); p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); - p.C(" if (u_matspecular.a <= 0.0) {\n"); - p.C(" ldot = 1.0;\n"); - p.C(" } else {\n"); + p.C(" if (u_matspecular.a > 0.0) {\n"); p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" } else {\n"); + p.C(" ldot = 1.0;\n"); p.C(" }\n"); p.C(" if (ldot > 0.0)\n"); p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot * lightScale;\n", iStr); @@ -1115,10 +1111,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (poweredDiffuse) { // pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0. // Seen in Tales of the World: Radiant Mythology (#2424.) - p.C(" if (u_matspecular.a <= 0.0) {\n"); - p.C(" ldot = 1.0;\n"); - p.C(" } else {\n"); + p.C(" if (u_matspecular.a > 0.0) {\n"); p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" } else {\n"); + p.C(" ldot = 1.0;\n"); p.C(" }\n"); } @@ -1150,10 +1146,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag if (doSpecular) { p.C(" if (ldot >= 0.0) {\n"); p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); - p.C(" if (u_matspecular.a <= 0.0) {\n"); - p.C(" ldot = 1.0;\n"); - p.C(" } else {\n"); + p.C(" if (u_matspecular.a > 0.0) {\n"); p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); + p.C(" } else {\n"); + p.C(" ldot = 1.0;\n"); p.C(" }\n"); p.C(" if (ldot > 0.0)\n"); p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot %s;\n", iStr, timesLightScale); From ca63bb1e5b8301670f79153130ea2f20ff81e5c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 11 Jan 2023 19:34:58 +0100 Subject: [PATCH 5/5] Even more optimization/cleanup. Don't need to check ldot that we already know is >= 0.0. --- GPU/Common/VertexShaderGenerator.cpp | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/GPU/Common/VertexShaderGenerator.cpp b/GPU/Common/VertexShaderGenerator.cpp index 1e99e0834c9a..8d86c472e8ab 100644 --- a/GPU/Common/VertexShaderGenerator.cpp +++ b/GPU/Common/VertexShaderGenerator.cpp @@ -1059,25 +1059,19 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag p.C(" }\n"); p.C(" ldot = dot(toLight, worldnormal);\n"); p.C(" if (comp == 0x2u) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE - p.C(" if (u_matspecular.a > 0.0) {\n"); - p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); - p.C(" } else {\n"); - p.C(" ldot = 1.0;\n"); - p.C(" }\n"); - p.C(" }\n"); - p.F(" diffuse = (u_lightdiffuse%s * diffuseColor) * max(ldot, 0.0);\n", iStr); - p.C(" if (comp == 0x1u) {\n"); // do specular + p.C(" ldot = u_matspecular.a > 0.0 ? pow(max(ldot, 0.0), u_matspecular.a) : 1.0;\n"); + p.C(" } else if (comp == 0x1u) {\n"); // do specular p.C(" if (ldot >= 0.0) {\n"); - p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); p.C(" if (u_matspecular.a > 0.0) {\n"); + p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n"); p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n"); p.C(" } else {\n"); p.C(" ldot = 1.0;\n"); p.C(" }\n"); - p.C(" if (ldot > 0.0)\n"); - p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot * lightScale;\n", iStr); + p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot * lightScale;\n", iStr); p.C(" }\n"); p.C(" }\n"); + p.F(" diffuse = (u_lightdiffuse%s * diffuseColor) * max(ldot, 0.0);\n", iStr); p.F(" lightSum0.rgb += (u_lightambient%s * ambientColor.rgb + diffuse) * lightScale;\n", iStr); p.C(" }\n"); }