Skip to content

Commit

Permalink
Merge pull request #16791 from hrydgard/lighting-code-cleanup
Browse files Browse the repository at this point in the history
Lighting code cleanup and optimization
  • Loading branch information
hrydgard authored Jan 11, 2023
2 parents 8d3a328 + ca63bb1 commit 30586be
Show file tree
Hide file tree
Showing 6 changed files with 72 additions and 76 deletions.
18 changes: 18 additions & 0 deletions Common/Data/Convert/SmallDataConvert.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <cstdint>
#include <cstring>
#include <cmath>

#include "Common/Common.h"
#include "ppsspp_config.h"
Expand Down Expand Up @@ -227,6 +228,23 @@ inline void ExpandFloat24x3ToFloat4(float dest[4], const uint32_t src[3]) {
#endif
}

// Note: If length is 0.0, it's gonna be left as 0.0 instead of trying to normalize. This is important.
inline void ExpandFloat24x3ToFloat4AndNormalize(float dest[4], const uint32_t src[3]) {
float temp[4];
ExpandFloat24x3ToFloat4(temp, src);
// TODO: Reuse code from NormalizedOr001 and optimize
float x = temp[0];
float y = temp[1];
float z = temp[2];
float len = sqrtf(x * x + y * y + z * z);
if (len != 0.0f)
len = 1.0f / len;
dest[0] = x * len;
dest[1] = y * len;
dest[2] = z * len;
dest[3] = 0.0f;
}

inline uint32_t BytesToUint32(uint8_t a, uint8_t b, uint8_t c, uint8_t d) {
return (a) | (b << 8) | (c << 16) | (d << 24);
}
Expand Down
15 changes: 4 additions & 11 deletions GPU/Common/ShaderUniforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ uint32_t PackLightControlBits() {

u32 computation = (u32)gstate.getLightComputation(i); // 2 bits
u32 type = (u32)gstate.getLightType(i); // 2 bits
if (type == 3) { type = 0; } // Don't want to handle this degenerate case in the shader.
lightControl |= computation << (4 + i * 4);
lightControl |= type << (4 + i * 4 + 2);
}
Expand Down Expand Up @@ -327,20 +328,12 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
CopyFloat3To4(ub->lpos[i], vec);
ExpandFloat24x3ToFloat4AndNormalize(ub->lpos[i], &gstate.lpos[i * 3]);
} else {
ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
}
ExpandFloat24x3ToFloat4(ub->ldir[i], &gstate.ldir[i * 3]);
// ldir is only used for spotlights. Prenormalize it.
ExpandFloat24x3ToFloat4AndNormalize(ub->ldir[i], &gstate.ldir[i * 3]);
ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef);
Expand Down
73 changes: 32 additions & 41 deletions GPU/Common/VertexShaderGenerator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1019,7 +1019,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (lightUberShader) {
// We generate generic code that can calculate any combination of lights specified
// in u_lightControl. u_lightControl is computed in PackLightControlBits().
p.C(" uint comp; uint type;\n");
p.C(" uint comp; uint type; float attenuation;\n");
if (useIndexing) {
p.C(" for (uint i = 0; i < 4; i++) {\n");
}
Expand All @@ -1040,47 +1040,38 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
p.F(" toLight = u_lightpos%s;\n", iStr);
p.C(" if (type != 0x0u) {\n"); // GE_LIGHTTYPE_DIRECTIONAL
p.F(" toLight -= worldpos;\n", iStr);
p.F(" toLight -= worldpos;\n");
p.F(" distance = length(toLight);\n");
p.F(" toLight /= distance;\n");
p.F(" attenuation = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr);
p.C(" if (type == 0x01u) {\n"); // GE_LIGHTTYPE_POINT
p.C(" lightScale = attenuation;\n");
p.C(" } else {\n"); // type must be 0x02 - GE_LIGHTTYPE_SPOT
p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr);
p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr);
p.F(" lightScale = attenuation * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr);
p.C(" } else {\n");
p.C(" lightScale = 0.0;\n");
p.C(" }\n");
p.C(" }\n");
p.C(" } else {\n");
p.C(" lightScale = 1.0;\n"); // GE_LIGHTTYPE_DIRECTIONAL
p.C(" }\n");
p.C(" ldot = dot(toLight, worldnormal);\n");
p.C(" if (comp == 0x2u) {\n"); // GE_LIGHTCOMP_ONLYPOWDIFFUSE
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" }\n");
p.C(" }\n");
p.C(" switch (int(type)) {\n"); // Attenuation
p.C(" case 1:\n"); // GE_LIGHTTYPE_POINT
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0);\n", iStr);
p.C(" break;\n");
p.C(" case 2:\n"); // GE_LIGHTTYPE_SPOT
p.F(" angle = length(u_lightdir%s) == 0.0 ? 0.0 : dot(normalize(u_lightdir%s), toLight);\n", iStr, iStr);
p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr);
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr);
p.C(" } else {\n");
p.C(" lightScale = 0.0;\n");
p.C(" }\n");
p.C(" break;\n");
p.C(" default:\n"); // GE_LIGHTTYPE_DIRECTIONAL
p.C(" lightScale = 1.0;\n");
p.C(" break;\n");
p.C(" }\n");
p.F(" diffuse = (u_lightdiffuse%s * diffuseColor) * max(ldot, 0.0);\n", iStr);
p.C(" if (comp == 0x1u) {\n"); // do specular
p.C(" ldot = u_matspecular.a > 0.0 ? pow(max(ldot, 0.0), u_matspecular.a) : 1.0;\n");
p.C(" } else if (comp == 0x1u) {\n"); // do specular
p.C(" if (ldot >= 0.0) {\n");
p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" if (u_matspecular.a > 0.0) {\n");
p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" } else {\n");
p.C(" ldot = 1.0;\n");
p.C(" }\n");
p.C(" if (ldot > 0.0)\n");
p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot * lightScale;\n", iStr);
p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot * lightScale;\n", iStr);
p.C(" }\n");
p.C(" }\n");
p.F(" diffuse = (u_lightdiffuse%s * diffuseColor) * max(ldot, 0.0);\n", iStr);
p.F(" lightSum0.rgb += (u_lightambient%s * ambientColor.rgb + diffuse) * lightScale;\n", iStr);
p.C(" }\n");
}
Expand Down Expand Up @@ -1114,10 +1105,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (poweredDiffuse) {
// pow(0.0, 0.0) may be undefined, but the PSP seems to treat it as 1.0.
// Seen in Tales of the World: Radiant Mythology (#2424.)
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" if (u_matspecular.a > 0.0) {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" } else {\n");
p.C(" ldot = 1.0;\n");
p.C(" }\n");
}

Expand All @@ -1133,7 +1124,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
break;
case GE_LIGHTTYPE_SPOT:
case GE_LIGHTTYPE_UNKNOWN:
p.F(" angle = length(u_lightdir%s) == 0.0 ? 0.0 : dot(normalize(u_lightdir%s), toLight);\n", iStr, iStr);
p.F(" angle = dot(u_lightdir%s, toLight);\n", iStr, iStr);
p.F(" if (angle >= u_lightangle_spotCoef%s.x) {\n", iStr);
p.F(" lightScale = clamp(1.0 / dot(u_lightatt%s, vec3(1.0, distance, distance*distance)), 0.0, 1.0) * (u_lightangle_spotCoef%s.y <= 0.0 ? 1.0 : pow(angle, u_lightangle_spotCoef%s.y));\n", iStr, iStr, iStr);
p.C(" } else {\n");
Expand All @@ -1149,10 +1140,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (doSpecular) {
p.C(" if (ldot >= 0.0) {\n");
p.C(" ldot = dot(normalize(toLight + vec3(0.0, 0.0, 1.0)), worldnormal);\n");
p.C(" if (u_matspecular.a <= 0.0) {\n");
p.C(" ldot = 1.0;\n");
p.C(" } else {\n");
p.C(" if (u_matspecular.a > 0.0) {\n");
p.C(" ldot = pow(max(ldot, 0.0), u_matspecular.a);\n");
p.C(" } else {\n");
p.C(" ldot = 1.0;\n");
p.C(" }\n");
p.C(" if (ldot > 0.0)\n");
p.F(" lightSum1 += u_lightspecular%s * specularColor * ldot %s;\n", iStr, timesLightScale);
Expand All @@ -1170,10 +1161,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
} else {
if (lightUberShader) {
p.C(" bool lmode = (u_lightControl & (0x1u << 0x17u)) != 0x0u;\n");
p.C(" if (lmode) {");
p.C(" if (lmode) {\n");
p.F(" %sv_color0 = lightSum0;\n", compat.vsOutPrefix);
p.F(" %sv_color1 = clamp(lightSum1, 0.0, 1.0);\n", compat.vsOutPrefix);
p.C(" } else {");
p.C(" } else {\n");
p.F(" %sv_color0 = clamp(lightSum0 + vec4(lightSum1, 0.0), 0.0, 1.0);\n", compat.vsOutPrefix);
p.F(" %sv_color1 = splat3(0.0);\n", compat.vsOutPrefix);
p.C(" }");
Expand Down
20 changes: 8 additions & 12 deletions GPU/Directx9/ShaderManagerDX9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ void ShaderManagerDX9::VSSetFloat24Uniform3(int creg, const u32 data[3]) {
device_->SetVertexShaderConstantF(creg, f, 1);
}

void ShaderManagerDX9::VSSetFloat24Uniform3Normalized(int creg, const u32 data[3]) {
float f[4];
ExpandFloat24x3ToFloat4AndNormalize(f, data);
device_->SetVertexShaderConstantF(creg, f, 1);
}

void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) {
float f[4];
Uint8x3ToFloat4_AlphaUint8(f, color, alpha);
Expand Down Expand Up @@ -495,21 +501,11 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3);
VSSetFloat24Uniform3Normalized(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
} else {
VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
}
VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
VSSetFloat24Uniform3Normalized(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]);
float angle_spotCoef[4] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
VSSetFloatUniform4(CONST_VS_LIGHTANGLE_SPOTCOEF + i, angle_spotCoef);
Expand Down
1 change: 1 addition & 0 deletions GPU/Directx9/ShaderManagerDX9.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class ShaderManagerDX9 : public ShaderManagerCommon {
void VSSetFloat(int creg, float value);
void VSSetFloatArray(int creg, const float *value, int count);
void VSSetFloat24Uniform3(int creg, const u32 data[3]);
void VSSetFloat24Uniform3Normalized(int creg, const u32 data[3]);
void VSSetFloatUniform4(int creg, const float data[4]);

void Clear();
Expand Down
21 changes: 9 additions & 12 deletions GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,12 @@ static void SetFloat24Uniform3(GLRenderManager *render, GLint *uniform, const ui
render->SetUniformF(uniform, 3, f);
}

static void SetFloat24Uniform3Normalized(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) {
float f[4];
ExpandFloat24x3ToFloat4AndNormalize(f, data);
render->SetUniformF(uniform, 3, f);
}

static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data[4]) {
render->SetUniformF(uniform, 4, data);
}
Expand Down Expand Up @@ -650,21 +656,12 @@ void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRenderin
for (int i = 0; i < 4; i++) {
if (dirty & (DIRTY_LIGHT0 << i)) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
render_->SetUniformF(&u_lightpos[i], 3, vec);
// Prenormalize for cheaper calculations in shader
SetFloat24Uniform3Normalized(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
} else {
SetFloat24Uniform3(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
}
if (u_lightdir[i] != -1) SetFloat24Uniform3(render_, &u_lightdir[i], &gstate.ldir[i * 3]);
if (u_lightdir[i] != -1) SetFloat24Uniform3Normalized(render_, &u_lightdir[i], &gstate.ldir[i * 3]);
if (u_lightatt[i] != -1) SetFloat24Uniform3(render_, &u_lightatt[i], &gstate.latt[i * 3]);
if (u_lightangle_spotCoef[i] != -1) {
float lightangle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
Expand Down

0 comments on commit 30586be

Please sign in to comment.