summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelle Raaijmakers <jelle@gmta.nl>2023-02-01 23:06:48 +0100
committerAndreas Kling <kling@serenityos.org>2023-02-02 14:38:26 +0100
commit62f4486190d3ffdcd562c226e7cb9c2343242a70 (patch)
tree24936328affc35fd2a7d372ee1a380a5e0cbba6d
parentdcf33f9b8f0e0a84954acc85330f0634a6e62efb (diff)
downloadserenity-62f4486190d3ffdcd562c226e7cb9c2343242a70.zip
LibSoftGPU: Only enable texture stages if required
Copying over every texel (4x`f32x4`) for every texture unit is relatively expensive. By checking if we even need to remember these texel values, we reduce the time spent in `rasterize_triangle` by around 2% as measured in Quake III.
-rw-r--r--Userland/Libraries/LibSoftGPU/Device.cpp11
-rw-r--r--Userland/Libraries/LibSoftGPU/Device.h1
2 files changed, 11 insertions, 1 deletions
diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp
index 9e667309c8..ece3abebc1 100644
--- a/Userland/Libraries/LibSoftGPU/Device.cpp
+++ b/Userland/Libraries/LibSoftGPU/Device.cpp
@@ -1162,8 +1162,9 @@ ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad)
// OpenGL 2.0 ΒΆ 3.5.1 states (in a roundabout way) that texture coordinates must be divided by Q
auto homogeneous_texture_coordinate = quad.get_input_vector4(SHADER_INPUT_FIRST_TEXCOORD + i * 4);
auto texel = sampler.sample_2d(homogeneous_texture_coordinate.xy() / homogeneous_texture_coordinate.w());
- texture_stage_texel[i] = texel;
INCREASE_STATISTICS_COUNTER(g_num_sampler_calls, 1);
+ if (m_samplers_need_texture_staging)
+ texture_stage_texel[i] = texel;
// FIXME: implement support for GL_ALPHA, GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_INTENSITY and GL_RGB internal formats
auto& fixed_function_env = sampler.config().fixed_function_texture_environment;
@@ -1584,6 +1585,14 @@ void Device::set_sampler_config(unsigned sampler, GPU::SamplerConfig const& conf
VERIFY(config.bound_image.is_null() || config.bound_image->ownership_token() == this);
m_samplers[sampler].set_config(config);
+
+ m_samplers_need_texture_staging = any_of(m_samplers, [](auto const& sampler) {
+ auto const& fixed_function_env = sampler.config().fixed_function_texture_environment;
+ if (fixed_function_env.env_mode != GPU::TextureEnvMode::Combine)
+ return false;
+ return any_of(fixed_function_env.alpha_source, [](auto texture_source) { return texture_source == GPU::TextureSource::TextureStage; })
+ || any_of(fixed_function_env.rgb_source, [](auto texture_source) { return texture_source == GPU::TextureSource::TextureStage; });
+ });
}
void Device::set_light_state(unsigned int light_id, GPU::Light const& light)
diff --git a/Userland/Libraries/LibSoftGPU/Device.h b/Userland/Libraries/LibSoftGPU/Device.h
index fdbfb297f7..ced6abac16 100644
--- a/Userland/Libraries/LibSoftGPU/Device.h
+++ b/Userland/Libraries/LibSoftGPU/Device.h
@@ -111,6 +111,7 @@ private:
Vector<Triangle> m_processed_triangles;
Vector<GPU::Vertex> m_clipped_vertices;
Array<Sampler, GPU::NUM_TEXTURE_UNITS> m_samplers;
+ bool m_samplers_need_texture_staging { false };
Array<GPU::Light, NUM_LIGHTS> m_lights;
Array<GPU::Material, 2u> m_materials;
GPU::RasterPosition m_raster_position;