summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJelle Raaijmakers <jelle@gmta.nl>2023-02-01 21:18:53 +0100
committerAndreas Kling <kling@serenityos.org>2023-02-02 14:38:26 +0100
commit69b94e4235c1869ea3963144a8898b87d61c0b8e (patch)
tree487560b9d2c15e8b9d2bb5b7c8f86e2694c77ad3
parentf0f9d8f1e08689cb0d902bbbe79873803b46c0c2 (diff)
downloadserenity-69b94e4235c1869ea3963144a8898b87d61c0b8e.zip
LibSoftGPU: Make blending simpler and more efficient
Previously, we would precalculate "alpha blend factors" on every configuration update and then calculate the source and destination blending factors in one go using all these factors. The idea here was probably that we would get better performance by avoiding branching. However, by measuring blending performance in Quake III, it seems that this simpler version that only calculates the required factors reduces the CPU time spent in `rasterize_triangle` by 3%. As a bonus, `GL_SRC_ALPHA_SATURATE` is now also implemented.
-rw-r--r--Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h27
-rw-r--r--Userland/Libraries/LibSoftGPU/Device.cpp155
-rw-r--r--Userland/Libraries/LibSoftGPU/Device.h5
-rw-r--r--Userland/Libraries/LibSoftGPU/SIMD.h6
4 files changed, 63 insertions, 130 deletions
diff --git a/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h b/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h
deleted file mode 100644
index 623f3db33b..0000000000
--- a/Userland/Libraries/LibSoftGPU/AlphaBlendFactors.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
-
-#pragma once
-
-#include <LibGfx/Vector4.h>
-
-namespace SoftGPU {
-
-struct AlphaBlendFactors final {
- FloatVector4 src_constant {};
- float src_factor_src_alpha = 0;
- float src_factor_dst_alpha = 0;
- float src_factor_src_color = 0;
- float src_factor_dst_color = 0;
-
- FloatVector4 dst_constant {};
- float dst_factor_src_alpha = 0;
- float dst_factor_dst_alpha = 0;
- float dst_factor_src_color = 0;
- float dst_factor_dst_color = 0;
-};
-
-}
diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp
index 0ec2d2b54d..9e667309c8 100644
--- a/Userland/Libraries/LibSoftGPU/Device.cpp
+++ b/Userland/Libraries/LibSoftGPU/Device.cpp
@@ -1,7 +1,7 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com>
- * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
+ * Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@@ -103,89 +103,6 @@ static Vector4<f32x4> to_vec4(u32x4 bgra)
};
}
-void Device::setup_blend_factors()
-{
- m_alpha_blend_factors = {};
-
- switch (m_options.blend_source_factor) {
- case GPU::BlendFactor::Zero:
- break;
- case GPU::BlendFactor::One:
- m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- break;
- case GPU::BlendFactor::SrcColor:
- m_alpha_blend_factors.src_factor_src_color = 1;
- break;
- case GPU::BlendFactor::OneMinusSrcColor:
- m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.src_factor_src_color = -1;
- break;
- case GPU::BlendFactor::SrcAlpha:
- m_alpha_blend_factors.src_factor_src_alpha = 1;
- break;
- case GPU::BlendFactor::OneMinusSrcAlpha:
- m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.src_factor_src_alpha = -1;
- break;
- case GPU::BlendFactor::DstAlpha:
- m_alpha_blend_factors.src_factor_dst_alpha = 1;
- break;
- case GPU::BlendFactor::OneMinusDstAlpha:
- m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.src_factor_dst_alpha = -1;
- break;
- case GPU::BlendFactor::DstColor:
- m_alpha_blend_factors.src_factor_dst_color = 1;
- break;
- case GPU::BlendFactor::OneMinusDstColor:
- m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.src_factor_dst_color = -1;
- break;
- case GPU::BlendFactor::SrcAlphaSaturate:
- default:
- VERIFY_NOT_REACHED();
- }
-
- switch (m_options.blend_destination_factor) {
- case GPU::BlendFactor::Zero:
- break;
- case GPU::BlendFactor::One:
- m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- break;
- case GPU::BlendFactor::SrcColor:
- m_alpha_blend_factors.dst_factor_src_color = 1;
- break;
- case GPU::BlendFactor::OneMinusSrcColor:
- m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.dst_factor_src_color = -1;
- break;
- case GPU::BlendFactor::SrcAlpha:
- m_alpha_blend_factors.dst_factor_src_alpha = 1;
- break;
- case GPU::BlendFactor::OneMinusSrcAlpha:
- m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.dst_factor_src_alpha = -1;
- break;
- case GPU::BlendFactor::DstAlpha:
- m_alpha_blend_factors.dst_factor_dst_alpha = 1;
- break;
- case GPU::BlendFactor::OneMinusDstAlpha:
- m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.dst_factor_dst_alpha = -1;
- break;
- case GPU::BlendFactor::DstColor:
- m_alpha_blend_factors.dst_factor_dst_color = 1;
- break;
- case GPU::BlendFactor::OneMinusDstColor:
- m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
- m_alpha_blend_factors.dst_factor_dst_color = -1;
- break;
- case GPU::BlendFactor::SrcAlphaSaturate:
- default:
- VERIFY_NOT_REACHED();
- }
-}
-
ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alpha_test_function, f32x4 const& reference_value)
{
auto const alpha = quad.get_output_float(SHADER_OUTPUT_FIRST_COLOR + 3);
@@ -218,6 +135,44 @@ ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alp
}
}
+ALWAYS_INLINE static bool is_blend_factor_constant(GPU::BlendFactor blend_factor)
+{
+ return (blend_factor == GPU::BlendFactor::One || blend_factor == GPU::BlendFactor::Zero);
+}
+
+// OpenGL 1.5 ยง 4.1.8, table 4.1
+ALWAYS_INLINE static Vector4<f32x4> get_blend_factor(GPU::BlendFactor blend_factor, Vector4<f32x4> const& source_color, Vector4<f32x4> const& destination_color)
+{
+ switch (blend_factor) {
+ case GPU::BlendFactor::DstAlpha:
+ return to_vec4(destination_color.w());
+ case GPU::BlendFactor::DstColor:
+ return destination_color;
+ case GPU::BlendFactor::One:
+ return to_vec4(expand4(1.f));
+ case GPU::BlendFactor::OneMinusDstAlpha:
+ return to_vec4(1.f - destination_color.w());
+ case GPU::BlendFactor::OneMinusDstColor:
+ return to_vec4(expand4(1.f)) - destination_color;
+ case GPU::BlendFactor::OneMinusSrcAlpha:
+ return to_vec4(1.f - source_color.w());
+ case GPU::BlendFactor::OneMinusSrcColor:
+ return to_vec4(expand4(1.f)) - source_color;
+ case GPU::BlendFactor::SrcAlpha:
+ return to_vec4(source_color.w());
+ case GPU::BlendFactor::SrcAlphaSaturate: {
+ auto saturated = min(source_color.w(), 1.f - destination_color.w());
+ return { saturated, saturated, saturated, expand4(1.f) };
+ }
+ case GPU::BlendFactor::SrcColor:
+ return source_color;
+ case GPU::BlendFactor::Zero:
+ return to_vec4(expand4(0.f));
+ default:
+ VERIFY_NOT_REACHED();
+ }
+}
+
template<typename CB1, typename CB2, typename CB3>
ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_coverage_mask, CB2 set_quad_depth, CB3 set_quad_attributes)
{
@@ -284,6 +239,18 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera
auto const qy0 = render_bounds_top & ~1;
auto const qy1 = render_bounds_bottom & ~1;
+ // Blend factors
+ Vector4<f32x4> src_factor;
+ Vector4<f32x4> dst_factor;
+ auto const src_factor_is_constant = is_blend_factor_constant(m_options.blend_source_factor);
+ auto const dst_factor_is_constant = is_blend_factor_constant(m_options.blend_destination_factor);
+ if (m_options.enable_blending) {
+ if (src_factor_is_constant)
+ src_factor = get_blend_factor(m_options.blend_source_factor, {}, {});
+ if (dst_factor_is_constant)
+ dst_factor = get_blend_factor(m_options.blend_destination_factor, {}, {});
+ }
+
// Rasterize all quads
// FIXME: this could be embarrassingly parallel
for (int qy = qy0; qy <= qy1; qy += 2) {
@@ -474,19 +441,12 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera
// Blend color values from pixel_staging into color_buffer
auto const& src = out_color;
- auto dst = to_vec4(dst_u32);
+ auto const dst = to_vec4(dst_u32);
- auto src_factor = expand4(m_alpha_blend_factors.src_constant)
- + src * m_alpha_blend_factors.src_factor_src_color
- + Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.src_factor_src_alpha
- + dst * m_alpha_blend_factors.src_factor_dst_color
- + Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.src_factor_dst_alpha;
-
- auto dst_factor = expand4(m_alpha_blend_factors.dst_constant)
- + src * m_alpha_blend_factors.dst_factor_src_color
- + Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.dst_factor_src_alpha
- + dst * m_alpha_blend_factors.dst_factor_dst_color
- + Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.dst_factor_dst_alpha;
+ if (!src_factor_is_constant)
+ src_factor = get_blend_factor(m_options.blend_source_factor, src, dst);
+ if (!dst_factor_is_constant)
+ dst_factor = get_blend_factor(m_options.blend_destination_factor, src, dst);
out_color = src * src_factor + dst * dst_factor;
}
@@ -1595,9 +1555,6 @@ void Device::draw_statistics_overlay(Gfx::Bitmap& target)
void Device::set_options(GPU::RasterizerOptions const& options)
{
m_options = options;
-
- if (m_options.enable_blending)
- setup_blend_factors();
}
void Device::set_light_model_params(GPU::LightModelParameters const& lighting_model)
diff --git a/Userland/Libraries/LibSoftGPU/Device.h b/Userland/Libraries/LibSoftGPU/Device.h
index 92a59b056e..fdbfb297f7 100644
--- a/Userland/Libraries/LibSoftGPU/Device.h
+++ b/Userland/Libraries/LibSoftGPU/Device.h
@@ -1,6 +1,6 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
- * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
+ * Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@@ -29,7 +29,6 @@
#include <LibGfx/Matrix4x4.h>
#include <LibGfx/Rect.h>
#include <LibGfx/Vector4.h>
-#include <LibSoftGPU/AlphaBlendFactors.h>
#include <LibSoftGPU/Buffer/FrameBuffer.h>
#include <LibSoftGPU/Buffer/Typed2DBuffer.h>
#include <LibSoftGPU/Clipper.h>
@@ -102,7 +101,6 @@ private:
void rasterize_point(GPU::Vertex&);
void rasterize_triangle(Triangle&);
- void setup_blend_factors();
void shade_fragments(PixelQuad&);
RefPtr<FrameBuffer<GPU::ColorType, GPU::DepthType, GPU::StencilType>> m_frame_buffer {};
@@ -113,7 +111,6 @@ private:
Vector<Triangle> m_processed_triangles;
Vector<GPU::Vertex> m_clipped_vertices;
Array<Sampler, GPU::NUM_TEXTURE_UNITS> m_samplers;
- AlphaBlendFactors m_alpha_blend_factors;
Array<GPU::Light, NUM_LIGHTS> m_lights;
Array<GPU::Material, 2u> m_materials;
GPU::RasterPosition m_raster_position;
diff --git a/Userland/Libraries/LibSoftGPU/SIMD.h b/Userland/Libraries/LibSoftGPU/SIMD.h
index c0ab018cde..2da561cf1b 100644
--- a/Userland/Libraries/LibSoftGPU/SIMD.h
+++ b/Userland/Libraries/LibSoftGPU/SIMD.h
@@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
+ * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@@ -138,4 +139,9 @@ ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> to_vec2_f32x4(Vector2<AK::SIMD::i3
};
}
+ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> to_vec4(AK::SIMD::f32x4 v)
+{
+ return { v, v, v, v };
+}
+
}