diff options
author | Jelle Raaijmakers <jelle@gmta.nl> | 2023-02-01 21:18:53 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2023-02-02 14:38:26 +0100 |
commit | 69b94e4235c1869ea3963144a8898b87d61c0b8e (patch) | |
tree | 487560b9d2c15e8b9d2bb5b7c8f86e2694c77ad3 | |
parent | f0f9d8f1e08689cb0d902bbbe79873803b46c0c2 (diff) | |
download | serenity-69b94e4235c1869ea3963144a8898b87d61c0b8e.zip |
LibSoftGPU: Make blending simpler and more efficient
Previously, we would precalculate "alpha blend factors" on every
configuration update and then calculate the source and destination
blending factors in one go using all these factors. The idea here was
probably that we would get better performance by avoiding branching.
However, by measuring blending performance in Quake III, it seems that
this simpler version that only calculates the required factors reduces
the CPU time spent in `rasterize_triangle` by 3%.
As a bonus, `GL_SRC_ALPHA_SATURATE` is now also implemented.
-rw-r--r-- | Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h | 27 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Device.cpp | 155 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Device.h | 5 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/SIMD.h | 6 |
4 files changed, 63 insertions, 130 deletions
diff --git a/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h b/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h deleted file mode 100644 index 623f3db33b..0000000000 --- a/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> - * - * SPDX-License-Identifier: BSD-2-Clause - */ - -#pragma once - -#include <LibGfx/Vector4.h> - -namespace SoftGPU { - -struct AlphaBlendFactors final { - FloatVector4 src_constant {}; - float src_factor_src_alpha = 0; - float src_factor_dst_alpha = 0; - float src_factor_src_color = 0; - float src_factor_dst_color = 0; - - FloatVector4 dst_constant {}; - float dst_factor_src_alpha = 0; - float dst_factor_dst_alpha = 0; - float dst_factor_src_color = 0; - float dst_factor_dst_color = 0; -}; - -} diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp index 0ec2d2b54d..9e667309c8 100644 --- a/Userland/Libraries/LibSoftGPU/Device.cpp +++ b/Userland/Libraries/LibSoftGPU/Device.cpp @@ -1,7 +1,7 @@ /* * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> * Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com> - * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl> + * Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl> * * SPDX-License-Identifier: BSD-2-Clause */ @@ -103,89 +103,6 @@ static Vector4<f32x4> to_vec4(u32x4 bgra) }; } -void Device::setup_blend_factors() -{ - m_alpha_blend_factors = {}; - - switch (m_options.blend_source_factor) { - case GPU::BlendFactor::Zero: - break; - case GPU::BlendFactor::One: - m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - break; - case GPU::BlendFactor::SrcColor: - m_alpha_blend_factors.src_factor_src_color = 1; - break; - case GPU::BlendFactor::OneMinusSrcColor: - m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.src_factor_src_color = -1; - break; - case GPU::BlendFactor::SrcAlpha: - m_alpha_blend_factors.src_factor_src_alpha = 1; - break; - case GPU::BlendFactor::OneMinusSrcAlpha: - m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.src_factor_src_alpha = -1; - break; - case GPU::BlendFactor::DstAlpha: - m_alpha_blend_factors.src_factor_dst_alpha = 1; - break; - case GPU::BlendFactor::OneMinusDstAlpha: - m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.src_factor_dst_alpha = -1; - break; - case GPU::BlendFactor::DstColor: - m_alpha_blend_factors.src_factor_dst_color = 1; - break; - case GPU::BlendFactor::OneMinusDstColor: - m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.src_factor_dst_color = -1; - break; - case GPU::BlendFactor::SrcAlphaSaturate: - default: - VERIFY_NOT_REACHED(); - } - - switch (m_options.blend_destination_factor) { - case GPU::BlendFactor::Zero: - break; - case GPU::BlendFactor::One: - m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - break; - case GPU::BlendFactor::SrcColor: - m_alpha_blend_factors.dst_factor_src_color = 1; - break; - case GPU::BlendFactor::OneMinusSrcColor: - m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.dst_factor_src_color = -1; - break; - case GPU::BlendFactor::SrcAlpha: - m_alpha_blend_factors.dst_factor_src_alpha = 1; - break; - case GPU::BlendFactor::OneMinusSrcAlpha: - m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.dst_factor_src_alpha = -1; - break; - case GPU::BlendFactor::DstAlpha: - m_alpha_blend_factors.dst_factor_dst_alpha = 1; - break; - case GPU::BlendFactor::OneMinusDstAlpha: - m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.dst_factor_dst_alpha = -1; - break; - case GPU::BlendFactor::DstColor: - m_alpha_blend_factors.dst_factor_dst_color = 1; - break; - case GPU::BlendFactor::OneMinusDstColor: - m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f }; - m_alpha_blend_factors.dst_factor_dst_color = -1; - break; - case GPU::BlendFactor::SrcAlphaSaturate: - default: - VERIFY_NOT_REACHED(); - } -} - ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alpha_test_function, f32x4 const& reference_value) { auto const alpha = quad.get_output_float(SHADER_OUTPUT_FIRST_COLOR + 3); @@ -218,6 +135,44 @@ ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alp } } +ALWAYS_INLINE static bool is_blend_factor_constant(GPU::BlendFactor blend_factor) +{ + return (blend_factor == GPU::BlendFactor::One || blend_factor == GPU::BlendFactor::Zero); +} + +// OpenGL 1.5 ยง 4.1.8, table 4.1 +ALWAYS_INLINE static Vector4<f32x4> get_blend_factor(GPU::BlendFactor blend_factor, Vector4<f32x4> const& source_color, Vector4<f32x4> const& destination_color) +{ + switch (blend_factor) { + case GPU::BlendFactor::DstAlpha: + return to_vec4(destination_color.w()); + case GPU::BlendFactor::DstColor: + return destination_color; + case GPU::BlendFactor::One: + return to_vec4(expand4(1.f)); + case GPU::BlendFactor::OneMinusDstAlpha: + return to_vec4(1.f - destination_color.w()); + case GPU::BlendFactor::OneMinusDstColor: + return to_vec4(expand4(1.f)) - destination_color; + case GPU::BlendFactor::OneMinusSrcAlpha: + return to_vec4(1.f - source_color.w()); + case GPU::BlendFactor::OneMinusSrcColor: + return to_vec4(expand4(1.f)) - source_color; + case GPU::BlendFactor::SrcAlpha: + return to_vec4(source_color.w()); + case GPU::BlendFactor::SrcAlphaSaturate: { + auto saturated = min(source_color.w(), 1.f - destination_color.w()); + return { saturated, saturated, saturated, expand4(1.f) }; + } + case GPU::BlendFactor::SrcColor: + return source_color; + case GPU::BlendFactor::Zero: + return to_vec4(expand4(0.f)); + default: + VERIFY_NOT_REACHED(); + } +} + template<typename CB1, typename CB2, typename CB3> ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_coverage_mask, CB2 set_quad_depth, CB3 set_quad_attributes) { @@ -284,6 +239,18 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera auto const qy0 = render_bounds_top & ~1; auto const qy1 = render_bounds_bottom & ~1; + // Blend factors + Vector4<f32x4> src_factor; + Vector4<f32x4> dst_factor; + auto const src_factor_is_constant = is_blend_factor_constant(m_options.blend_source_factor); + auto const dst_factor_is_constant = is_blend_factor_constant(m_options.blend_destination_factor); + if (m_options.enable_blending) { + if (src_factor_is_constant) + src_factor = get_blend_factor(m_options.blend_source_factor, {}, {}); + if (dst_factor_is_constant) + dst_factor = get_blend_factor(m_options.blend_destination_factor, {}, {}); + } + // Rasterize all quads // FIXME: this could be embarrassingly parallel for (int qy = qy0; qy <= qy1; qy += 2) { @@ -474,19 +441,12 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera // Blend color values from pixel_staging into color_buffer auto const& src = out_color; - auto dst = to_vec4(dst_u32); + auto const dst = to_vec4(dst_u32); - auto src_factor = expand4(m_alpha_blend_factors.src_constant) - + src * m_alpha_blend_factors.src_factor_src_color - + Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.src_factor_src_alpha - + dst * m_alpha_blend_factors.src_factor_dst_color - + Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.src_factor_dst_alpha; - - auto dst_factor = expand4(m_alpha_blend_factors.dst_constant) - + src * m_alpha_blend_factors.dst_factor_src_color - + Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.dst_factor_src_alpha - + dst * m_alpha_blend_factors.dst_factor_dst_color - + Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.dst_factor_dst_alpha; + if (!src_factor_is_constant) + src_factor = get_blend_factor(m_options.blend_source_factor, src, dst); + if (!dst_factor_is_constant) + dst_factor = get_blend_factor(m_options.blend_destination_factor, src, dst); out_color = src * src_factor + dst * dst_factor; } @@ -1595,9 +1555,6 @@ void Device::draw_statistics_overlay(Gfx::Bitmap& target) void Device::set_options(GPU::RasterizerOptions const& options) { m_options = options; - - if (m_options.enable_blending) - setup_blend_factors(); } void Device::set_light_model_params(GPU::LightModelParameters const& lighting_model) diff --git a/Userland/Libraries/LibSoftGPU/Device.h b/Userland/Libraries/LibSoftGPU/Device.h index 92a59b056e..fdbfb297f7 100644 --- a/Userland/Libraries/LibSoftGPU/Device.h +++ b/Userland/Libraries/LibSoftGPU/Device.h @@ -1,6 +1,6 @@ /* * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> - * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl> + * Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl> * * SPDX-License-Identifier: BSD-2-Clause */ @@ -29,7 +29,6 @@ #include <LibGfx/Matrix4x4.h> #include <LibGfx/Rect.h> #include <LibGfx/Vector4.h> -#include <LibSoftGPU/AlphaBlendFactors.h> #include <LibSoftGPU/Buffer/FrameBuffer.h> #include <LibSoftGPU/Buffer/Typed2DBuffer.h> #include <LibSoftGPU/Clipper.h> @@ -102,7 +101,6 @@ private: void rasterize_point(GPU::Vertex&); void rasterize_triangle(Triangle&); - void setup_blend_factors(); void shade_fragments(PixelQuad&); RefPtr<FrameBuffer<GPU::ColorType, GPU::DepthType, GPU::StencilType>> m_frame_buffer {}; @@ -113,7 +111,6 @@ private: Vector<Triangle> m_processed_triangles; Vector<GPU::Vertex> m_clipped_vertices; Array<Sampler, GPU::NUM_TEXTURE_UNITS> m_samplers; - AlphaBlendFactors m_alpha_blend_factors; Array<GPU::Light, NUM_LIGHTS> m_lights; Array<GPU::Material, 2u> m_materials; GPU::RasterPosition m_raster_position; diff --git a/Userland/Libraries/LibSoftGPU/SIMD.h b/Userland/Libraries/LibSoftGPU/SIMD.h index c0ab018cde..2da561cf1b 100644 --- a/Userland/Libraries/LibSoftGPU/SIMD.h +++ b/Userland/Libraries/LibSoftGPU/SIMD.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> + * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl> * * SPDX-License-Identifier: BSD-2-Clause */ @@ -138,4 +139,9 @@ ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> to_vec2_f32x4(Vector2<AK::SIMD::i3 }; } +ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> to_vec4(AK::SIMD::f32x4 v) +{ + return { v, v, v, v }; +} + } |