diff options
author | Jelle Raaijmakers <jelle@gmta.nl> | 2022-01-16 22:48:46 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-01-17 12:49:00 +0100 |
commit | 11c807ebd1e0677bc93e6dc98123c9ca210b2cff (patch) | |
tree | ac20931082cace861085fcf8f3b0cf63c80020ad /Userland | |
parent | 638667194438416527b267065e1077a747208c35 (diff) | |
download | serenity-11c807ebd1e0677bc93e6dc98123c9ca210b2cff.zip |
LibGL+LibSoftGPU: Implement the stencil buffer
This implements an 8-bit front stencil buffer. Stencil operations are
SIMD optimized. LibGL changes include:
* New `glStencilMask` and `glStencilMaskSeparate` functions
* New context parameter `GL_STENCIL_CLEAR_VALUE`
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibGL/GL/gl.h | 4 | ||||
-rw-r--r-- | Userland/Libraries/LibGL/GLContext.h | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibGL/GLStencil.cpp | 7 | ||||
-rw-r--r-- | Userland/Libraries/LibGL/SoftwareGLContext.cpp | 150 | ||||
-rw-r--r-- | Userland/Libraries/LibGL/SoftwareGLContext.h | 7 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Device.cpp | 209 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Device.h | 22 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/DeviceInfo.h | 1 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Enums.h | 22 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/StencilBuffer.cpp | 41 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/StencilBuffer.h | 33 |
12 files changed, 420 insertions, 78 deletions
diff --git a/Userland/Libraries/LibGL/GL/gl.h b/Userland/Libraries/LibGL/GL/gl.h index 60bd67261d..af297e3740 100644 --- a/Userland/Libraries/LibGL/GL/gl.h +++ b/Userland/Libraries/LibGL/GL/gl.h @@ -98,6 +98,7 @@ extern "C" { #define GL_COLOR_MATERIAL 0x0B57 #define GL_FOG_START 0x0B63 #define GL_FOG_END 0x0B64 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 #define GL_MATRIX_MODE 0x0BA0 #define GL_NORMALIZE 0x0BA1 #define GL_VIEWPORT 0x0BA2 @@ -605,9 +606,10 @@ GLAPI void glLightModelfv(GLenum pname, GLfloat const* params); GLAPI void glLightModeli(GLenum pname, GLint param); GLAPI void glStencilFunc(GLenum func, GLint ref, GLuint mask); GLAPI void glStencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask); +GLAPI void glStencilMask(GLuint mask); +GLAPI void glStencilMaskSeparate(GLenum face, GLuint mask); GLAPI void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass); GLAPI void glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass); -GLAPI void glStencilMask(GLuint mask); GLAPI void glNormal3f(GLfloat nx, GLfloat ny, GLfloat nz); GLAPI void glNormal3fv(GLfloat const* v); GLAPI void glNormalPointer(GLenum type, GLsizei stride, void const* pointer); diff --git a/Userland/Libraries/LibGL/GLContext.h b/Userland/Libraries/LibGL/GLContext.h index bf189197d9..1eecd49830 100644 --- a/Userland/Libraries/LibGL/GLContext.h +++ b/Userland/Libraries/LibGL/GLContext.h @@ -97,6 +97,7 @@ public: virtual void gl_pixel_storei(GLenum pname, GLint param) = 0; virtual void gl_scissor(GLint x, GLint y, GLsizei width, GLsizei height) = 0; virtual void gl_stencil_func_separate(GLenum face, GLenum func, GLint ref, GLuint mask) = 0; + virtual void gl_stencil_mask_separate(GLenum face, GLuint mask) = 0; virtual void gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) = 0; virtual void gl_normal(GLfloat nx, GLfloat ny, GLfloat nz) = 0; virtual void gl_normal_pointer(GLenum type, GLsizei stride, void const* pointer) = 0; diff --git a/Userland/Libraries/LibGL/GLStencil.cpp b/Userland/Libraries/LibGL/GLStencil.cpp index 80ded3ef5d..7e2916ef08 100644 --- a/Userland/Libraries/LibGL/GLStencil.cpp +++ b/Userland/Libraries/LibGL/GLStencil.cpp @@ -36,5 +36,10 @@ void glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass void glStencilMask(GLuint mask) { - dbgln("(STUBBED) glStencilMask(0x{:08x})", mask); + g_gl_context->gl_stencil_mask_separate(GL_FRONT_AND_BACK, mask); +} + +void glStencilMaskSeparate(GLenum face, GLuint mask) +{ + g_gl_context->gl_stencil_mask_separate(face, mask); } diff --git a/Userland/Libraries/LibGL/SoftwareGLContext.cpp b/Userland/Libraries/LibGL/SoftwareGLContext.cpp index 225ddc016c..0ca7b2f897 100644 --- a/Userland/Libraries/LibGL/SoftwareGLContext.cpp +++ b/Userland/Libraries/LibGL/SoftwareGLContext.cpp @@ -165,7 +165,9 @@ Optional<ContextParameter> SoftwareGLContext::get_context_parameter(GLenum name) return ContextParameter { .type = GL_BOOL, .is_capability = true, .value = { .boolean_value = scissor_enabled } }; } case GL_STENCIL_BITS: - return ContextParameter { .type = GL_INT, .value = { .integer_value = sizeof(float) * 8 } }; + return ContextParameter { .type = GL_INT, .value = { .integer_value = m_device_info.stencil_bits } }; + case GL_STENCIL_CLEAR_VALUE: + return ContextParameter { .type = GL_INT, .value = { .integer_value = m_clear_stencil } }; case GL_STENCIL_TEST: return ContextParameter { .type = GL_BOOL, .is_capability = true, .value = { .boolean_value = m_stencil_test_enabled } }; case GL_TEXTURE_1D: @@ -239,9 +241,8 @@ void SoftwareGLContext::gl_clear(GLbitfield mask) if (mask & GL_DEPTH_BUFFER_BIT) m_rasterizer.clear_depth(static_cast<float>(m_clear_depth)); - // FIXME: implement GL_STENCIL_BUFFER_BIT if (mask & GL_STENCIL_BUFFER_BIT) - dbgln_if(GL_DEBUG, "gl_clear(): GL_STENCIL_BUFFER_BIT is unimplemented"); + m_rasterizer.clear_stencil(m_clear_stencil); } void SoftwareGLContext::gl_clear_color(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) @@ -268,9 +269,7 @@ void SoftwareGLContext::gl_clear_stencil(GLint s) RETURN_WITH_ERROR_IF(m_in_draw_state, GL_INVALID_OPERATION); - // FIXME: "s is masked with 2^m - 1 , where m is the number of bits in the stencil buffer" - - m_clear_stencil = s; + m_clear_stencil = static_cast<u8>(s & ((1 << m_device_info.stencil_bits) - 1)); } void SoftwareGLContext::gl_color(GLdouble r, GLdouble g, GLdouble b, GLdouble a) @@ -697,6 +696,8 @@ void SoftwareGLContext::gl_enable(GLenum capability) break; case GL_STENCIL_TEST: m_stencil_test_enabled = true; + rasterizer_options.enable_stencil_test = true; + update_rasterizer_options = true; break; case GL_TEXTURE_1D: m_active_texture_unit->set_texture_1d_enabled(true); @@ -808,6 +809,8 @@ void SoftwareGLContext::gl_disable(GLenum capability) break; case GL_STENCIL_TEST: m_stencil_test_enabled = false; + rasterizer_options.enable_stencil_test = false; + update_rasterizer_options = true; break; case GL_TEXTURE_1D: m_active_texture_unit->set_texture_1d_enabled(false); @@ -2631,13 +2634,28 @@ void SoftwareGLContext::gl_stencil_func_separate(GLenum face, GLenum func, GLint || func == GL_ALWAYS), GL_INVALID_ENUM); - // FIXME: "ref is clamped to the range 02^n - 1 , where n is the number of bitplanes in the stencil buffer" + ref = clamp(ref, 0, (1 << m_device_info.stencil_bits) - 1); StencilFunctionOptions new_options = { func, ref, mask }; if (face == GL_FRONT || face == GL_FRONT_AND_BACK) m_stencil_function[Face::Front] = new_options; if (face == GL_BACK || face == GL_FRONT_AND_BACK) m_stencil_function[Face::Back] = new_options; + + m_stencil_configuration_dirty = true; +} + +void SoftwareGLContext::gl_stencil_mask_separate(GLenum face, GLuint mask) +{ + APPEND_TO_CALL_LIST_AND_RETURN_IF_NEEDED(gl_stencil_mask_separate, face, mask); + RETURN_WITH_ERROR_IF(m_in_draw_state, GL_INVALID_OPERATION); + + if (face == GL_FRONT || face == GL_FRONT_AND_BACK) + m_stencil_operation[Face::Front].write_mask = mask; + if (face == GL_BACK || face == GL_FRONT_AND_BACK) + m_stencil_operation[Face::Back].write_mask = mask; + + m_stencil_configuration_dirty = true; } void SoftwareGLContext::gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) @@ -2647,39 +2665,26 @@ void SoftwareGLContext::gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum RETURN_WITH_ERROR_IF(!(face == GL_FRONT || face == GL_BACK || face == GL_FRONT_AND_BACK), GL_INVALID_ENUM); - RETURN_WITH_ERROR_IF(!(sfail == GL_KEEP - || sfail == GL_ZERO - || sfail == GL_REPLACE - || sfail == GL_INCR - || sfail == GL_INCR_WRAP - || sfail == GL_DECR - || sfail == GL_DECR_WRAP - || sfail == GL_INVERT), - GL_INVALID_ENUM); - RETURN_WITH_ERROR_IF(!(dpfail == GL_KEEP - || dpfail == GL_ZERO - || dpfail == GL_REPLACE - || dpfail == GL_INCR - || dpfail == GL_INCR_WRAP - || dpfail == GL_DECR - || dpfail == GL_DECR_WRAP - || dpfail == GL_INVERT), - GL_INVALID_ENUM); - RETURN_WITH_ERROR_IF(!(dppass == GL_KEEP - || dppass == GL_ZERO - || dppass == GL_REPLACE - || dppass == GL_INCR - || dppass == GL_INCR_WRAP - || dppass == GL_DECR - || dppass == GL_DECR_WRAP - || dppass == GL_INVERT), - GL_INVALID_ENUM); - - StencilOperationOptions new_options = { sfail, dpfail, dppass }; + auto is_valid_op = [](GLenum op) -> bool { + return op == GL_KEEP || op == GL_ZERO || op == GL_REPLACE || op == GL_INCR || op == GL_INCR_WRAP + || op == GL_DECR || op == GL_DECR_WRAP || op == GL_INVERT; + }; + RETURN_WITH_ERROR_IF(!is_valid_op(sfail), GL_INVALID_ENUM); + RETURN_WITH_ERROR_IF(!is_valid_op(dpfail), GL_INVALID_ENUM); + RETURN_WITH_ERROR_IF(!is_valid_op(dppass), GL_INVALID_ENUM); + + auto update_stencil_operation = [&](Face face, GLenum sfail, GLenum dpfail, GLenum dppass) { + auto& stencil_operation = m_stencil_operation[face]; + stencil_operation.op_fail = sfail; + stencil_operation.op_depth_fail = dpfail; + stencil_operation.op_pass = dppass; + }; if (face == GL_FRONT || face == GL_FRONT_AND_BACK) - m_stencil_operation[Face::Front] = new_options; + update_stencil_operation(Face::Front, sfail, dpfail, dppass); if (face == GL_BACK || face == GL_FRONT_AND_BACK) - m_stencil_operation[Face::Back] = new_options; + update_stencil_operation(Face::Back, sfail, dpfail, dppass); + + m_stencil_configuration_dirty = true; } void SoftwareGLContext::gl_normal(GLfloat nx, GLfloat ny, GLfloat nz) @@ -2908,6 +2913,7 @@ void SoftwareGLContext::sync_device_config() sync_device_sampler_config(); sync_device_texcoord_config(); sync_light_state(); + sync_stencil_configuration(); } void SoftwareGLContext::sync_device_sampler_config() @@ -3170,6 +3176,74 @@ void SoftwareGLContext::sync_device_texcoord_config() m_rasterizer.set_options(options); } +void SoftwareGLContext::sync_stencil_configuration() +{ + if (!m_stencil_configuration_dirty) + return; + m_stencil_configuration_dirty = false; + + auto set_device_stencil = [&](SoftGPU::Face face, StencilFunctionOptions func, StencilOperationOptions op) { + SoftGPU::StencilConfiguration device_configuration; + + // Stencil test function + auto map_func = [](GLenum func) -> SoftGPU::StencilTestFunction { + switch (func) { + case GL_ALWAYS: + return SoftGPU::StencilTestFunction::Always; + case GL_EQUAL: + return SoftGPU::StencilTestFunction::Equal; + case GL_GEQUAL: + return SoftGPU::StencilTestFunction::GreaterOrEqual; + case GL_GREATER: + return SoftGPU::StencilTestFunction::Greater; + case GL_LESS: + return SoftGPU::StencilTestFunction::Less; + case GL_LEQUAL: + return SoftGPU::StencilTestFunction::LessOrEqual; + case GL_NEVER: + return SoftGPU::StencilTestFunction::Never; + case GL_NOTEQUAL: + return SoftGPU::StencilTestFunction::NotEqual; + } + VERIFY_NOT_REACHED(); + }; + device_configuration.test_function = map_func(func.func); + device_configuration.reference_value = func.reference_value; + device_configuration.test_mask = func.mask; + + // Stencil operation + auto map_operation = [](GLenum operation) -> SoftGPU::StencilOperation { + switch (operation) { + case GL_DECR: + return SoftGPU::StencilOperation::Decrement; + case GL_DECR_WRAP: + return SoftGPU::StencilOperation::DecrementWrap; + case GL_INCR: + return SoftGPU::StencilOperation::Increment; + case GL_INCR_WRAP: + return SoftGPU::StencilOperation::IncrementWrap; + case GL_INVERT: + return SoftGPU::StencilOperation::Invert; + case GL_KEEP: + return SoftGPU::StencilOperation::Keep; + case GL_REPLACE: + return SoftGPU::StencilOperation::Replace; + case GL_ZERO: + return SoftGPU::StencilOperation::Zero; + } + VERIFY_NOT_REACHED(); + }; + device_configuration.on_stencil_test_fail = map_operation(op.op_fail); + device_configuration.on_depth_test_fail = map_operation(op.op_depth_fail); + device_configuration.on_pass = map_operation(op.op_pass); + device_configuration.write_mask = op.write_mask; + + m_rasterizer.set_stencil_configuration(face, device_configuration); + }; + set_device_stencil(SoftGPU::Face::Front, m_stencil_function[Face::Front], m_stencil_operation[Face::Front]); + set_device_stencil(SoftGPU::Face::Back, m_stencil_function[Face::Back], m_stencil_operation[Face::Back]); +} + void SoftwareGLContext::gl_lightf(GLenum light, GLenum pname, GLfloat param) { APPEND_TO_CALL_LIST_AND_RETURN_IF_NEEDED(gl_lightf, light, pname, param); diff --git a/Userland/Libraries/LibGL/SoftwareGLContext.h b/Userland/Libraries/LibGL/SoftwareGLContext.h index 5494642944..ff5ea056be 100644 --- a/Userland/Libraries/LibGL/SoftwareGLContext.h +++ b/Userland/Libraries/LibGL/SoftwareGLContext.h @@ -128,6 +128,7 @@ public: virtual void gl_pixel_storei(GLenum pname, GLint param) override; virtual void gl_scissor(GLint x, GLint y, GLsizei width, GLsizei height) override; virtual void gl_stencil_func_separate(GLenum face, GLenum func, GLint ref, GLuint mask) override; + virtual void gl_stencil_mask_separate(GLenum face, GLuint mask) override; virtual void gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) override; virtual void gl_normal(GLfloat nx, GLfloat ny, GLfloat nz) override; virtual void gl_normal_pointer(GLenum type, GLsizei stride, void const* pointer) override; @@ -154,6 +155,7 @@ private: void sync_device_sampler_config(); void sync_device_texcoord_config(); void sync_light_state(); + void sync_stencil_configuration(); template<typename T> T* store_in_listing(T value) @@ -195,7 +197,7 @@ private: FloatVector4 m_clear_color { 0.0f, 0.0f, 0.0f, 0.0f }; double m_clear_depth { 1.0 }; - GLint m_clear_stencil { 0 }; + u8 m_clear_stencil { 0 }; FloatVector4 m_current_vertex_color = { 1.0f, 1.0f, 1.0f, 1.0f }; FloatVector4 m_current_vertex_tex_coord = { 0.0f, 0.0f, 0.0f, 1.0f }; @@ -225,6 +227,7 @@ private: // Stencil configuration bool m_stencil_test_enabled { false }; + bool m_stencil_configuration_dirty { true }; struct StencilFunctionOptions { GLenum func { GL_ALWAYS }; @@ -237,6 +240,7 @@ private: GLenum op_fail { GL_KEEP }; GLenum op_depth_fail { GL_KEEP }; GLenum op_pass { GL_KEEP }; + GLuint write_mask { NumericLimits<GLuint>::max() }; }; Array<StencilOperationOptions, 2u> m_stencil_operation; @@ -360,6 +364,7 @@ private: decltype(&SoftwareGLContext::gl_polygon_offset), decltype(&SoftwareGLContext::gl_scissor), decltype(&SoftwareGLContext::gl_stencil_func_separate), + decltype(&SoftwareGLContext::gl_stencil_mask_separate), decltype(&SoftwareGLContext::gl_stencil_op_separate), decltype(&SoftwareGLContext::gl_normal), decltype(&SoftwareGLContext::gl_raster_pos), diff --git a/Userland/Libraries/LibSoftGPU/CMakeLists.txt b/Userland/Libraries/LibSoftGPU/CMakeLists.txt index 96c04d4d42..03ee632187 100644 --- a/Userland/Libraries/LibSoftGPU/CMakeLists.txt +++ b/Userland/Libraries/LibSoftGPU/CMakeLists.txt @@ -4,6 +4,7 @@ set(SOURCES Device.cpp Image.cpp Sampler.cpp + StencilBuffer.cpp ) add_compile_options(-Wno-psabi) diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp index b2f3c1bd66..fa4509b977 100644 --- a/Userland/Libraries/LibSoftGPU/Device.cpp +++ b/Userland/Libraries/LibSoftGPU/Device.cpp @@ -1,12 +1,14 @@ /* * Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org> * Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com> + * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl> * * SPDX-License-Identifier: BSD-2-Clause */ #include <AK/Function.h> #include <AK/Math.h> +#include <AK/NumericLimits.h> #include <AK/SIMDExtras.h> #include <AK/SIMDMath.h> #include <LibCore/ElapsedTimer.h> @@ -82,11 +84,11 @@ static Vector4<f32x4> to_vec4(u32x4 rgba) }; } -static Gfx::IntRect window_coordinates_to_target_coordinates(Gfx::IntRect const window_rect, Gfx::IntRect const target_rect) +Gfx::IntRect Device::window_coordinates_to_target_coordinates(Gfx::IntRect const& window_rect) { return { window_rect.x(), - target_rect.height() - window_rect.height() - window_rect.y(), + m_render_target->rect().height() - window_rect.height() - window_rect.y(), window_rect.width(), window_rect.height(), }; @@ -213,7 +215,7 @@ void Device::rasterize_triangle(const Triangle& triangle) auto render_bounds = m_render_target->rect(); if (m_options.scissor_enabled) - render_bounds.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box, m_render_target->rect())); + render_bounds.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box)); // Obey top-left rule: // This sets up "zero" for later pixel coverage tests. @@ -229,7 +231,7 @@ void Device::rasterize_triangle(const Triangle& triangle) zero.set_y(0); // This function calculates the 3 edge values for the pixel relative to the triangle. - auto calculate_edge_values4 = [v0, v1, v2](const Vector2<i32x4>& p) -> Vector3<i32x4> { + auto calculate_edge_values4 = [v0, v1, v2](Vector2<i32x4> const& p) -> Vector3<i32x4> { return { edge_function4(v1, v2, p), edge_function4(v2, v0, p), @@ -238,7 +240,7 @@ void Device::rasterize_triangle(const Triangle& triangle) }; // This function tests whether a point as identified by its 3 edge values lies within the triangle - auto test_point4 = [zero](const Vector3<i32x4>& edges) -> i32x4 { + auto test_point4 = [zero](Vector3<i32x4> const& edges) -> i32x4 { return edges.x() >= zero.x() && edges.y() >= zero.y() && edges.z() >= zero.z(); @@ -257,8 +259,6 @@ void Device::rasterize_triangle(const Triangle& triangle) float const vertex1_eye_absz = fabs(vertex1.eye_coordinates.z()); float const vertex2_eye_absz = fabs(vertex2.eye_coordinates.z()); - // FIXME: implement stencil testing - int const render_bounds_left = render_bounds.x(); int const render_bounds_right = render_bounds.x() + render_bounds.width(); int const render_bounds_top = render_bounds.y(); @@ -269,10 +269,46 @@ void Device::rasterize_triangle(const Triangle& triangle) expand4(subpixel_factor / 2), }; + // Stencil configuration and writing + auto const stencil_configuration = m_stencil_configuration[Face::Front]; + auto const stencil_reference_value = stencil_configuration.reference_value & stencil_configuration.test_mask; + + auto write_to_stencil = [](u8* stencil_ptrs[4], i32x4 stencil_value, StencilOperation op, u8 reference_value, u8 write_mask, i32x4 pixel_mask) { + if (write_mask == 0 || op == StencilOperation::Keep) + return; + + switch (op) { + case StencilOperation::Decrement: + stencil_value = (stencil_value & ~write_mask) | (max(stencil_value - 1, expand4(0)) & write_mask); + break; + case StencilOperation::DecrementWrap: + stencil_value = (stencil_value & ~write_mask) | (((stencil_value - 1) & 0xFF) & write_mask); + break; + case StencilOperation::Increment: + stencil_value = (stencil_value & ~write_mask) | (min(stencil_value + 1, expand4(0xFF)) & write_mask); + break; + case StencilOperation::IncrementWrap: + stencil_value = (stencil_value & ~write_mask) | (((stencil_value + 1) & 0xFF) & write_mask); + break; + case StencilOperation::Invert: + stencil_value ^= write_mask; + break; + case StencilOperation::Replace: + stencil_value = (stencil_value & ~write_mask) | (reference_value & write_mask); + break; + case StencilOperation::Zero: + stencil_value &= ~write_mask; + break; + default: + VERIFY_NOT_REACHED(); + } + + store4_masked(stencil_value, stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], pixel_mask); + }; + // Iterate over all blocks within the bounds of the triangle for (int by = by0; by < by1; by += 2) { for (int bx = bx0; bx < bx1; bx += 2) { - PixelQuad quad; quad.screen_coordinates = { @@ -306,14 +342,70 @@ void Device::rasterize_triangle(const Triangle& triangle) int coverage_bits = maskbits(quad.mask); + // Stencil testing + u8* stencil_ptrs[4]; + i32x4 stencil_value; + if (m_options.enable_stencil_test) { + stencil_ptrs[0] = coverage_bits & 1 ? &m_stencil_buffer->scanline(by)[bx] : nullptr; + stencil_ptrs[1] = coverage_bits & 2 ? &m_stencil_buffer->scanline(by)[bx + 1] : nullptr; + stencil_ptrs[2] = coverage_bits & 4 ? &m_stencil_buffer->scanline(by + 1)[bx] : nullptr; + stencil_ptrs[3] = coverage_bits & 8 ? &m_stencil_buffer->scanline(by + 1)[bx + 1] : nullptr; + + stencil_value = load4_masked(stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], quad.mask); + stencil_value &= stencil_configuration.test_mask; + + i32x4 stencil_test_passed; + switch (stencil_configuration.test_function) { + case StencilTestFunction::Always: + stencil_test_passed = expand4(~0); + break; + case StencilTestFunction::Equal: + stencil_test_passed = stencil_value == stencil_reference_value; + break; + case StencilTestFunction::Greater: + stencil_test_passed = stencil_value > stencil_reference_value; + break; + case StencilTestFunction::GreaterOrEqual: + stencil_test_passed = stencil_value >= stencil_reference_value; + break; + case StencilTestFunction::Less: + stencil_test_passed = stencil_value < stencil_reference_value; + break; + case StencilTestFunction::LessOrEqual: + stencil_test_passed = stencil_value <= stencil_reference_value; + break; + case StencilTestFunction::Never: + stencil_test_passed = expand4(0); + break; + case StencilTestFunction::NotEqual: + stencil_test_passed = stencil_value != stencil_reference_value; + break; + default: + VERIFY_NOT_REACHED(); + } + + // Update stencil buffer for pixels that failed the stencil test + write_to_stencil( + stencil_ptrs, + stencil_value, + stencil_configuration.on_stencil_test_fail, + stencil_reference_value, + stencil_configuration.write_mask, + quad.mask & ~stencil_test_passed); + + // Update coverage mask + early quad rejection + quad.mask &= stencil_test_passed; + if (none(quad.mask)) + continue; + } + + // Depth testing float* depth_ptrs[4] = { coverage_bits & 1 ? &m_depth_buffer->scanline(by)[bx] : nullptr, coverage_bits & 2 ? &m_depth_buffer->scanline(by)[bx + 1] : nullptr, coverage_bits & 4 ? &m_depth_buffer->scanline(by + 1)[bx] : nullptr, coverage_bits & 8 ? &m_depth_buffer->scanline(by + 1)[bx + 1] : nullptr, }; - - // AND the depth mask onto the coverage mask if (m_options.enable_depth_test) { auto depth = load4_masked(depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask); @@ -321,31 +413,35 @@ void Device::rasterize_triangle(const Triangle& triangle) // FIXME: Also apply depth_offset_factor which depends on the depth gradient quad.depth += m_options.depth_offset_constant * NumericLimits<float>::epsilon(); + i32x4 depth_test_passed; switch (m_options.depth_func) { case DepthTestFunction::Always: + depth_test_passed = expand4(~0); break; case DepthTestFunction::Never: - quad.mask ^= quad.mask; + depth_test_passed = expand4(0); break; case DepthTestFunction::Greater: - quad.mask &= quad.depth > depth; + depth_test_passed = quad.depth > depth; break; case DepthTestFunction::GreaterOrEqual: - quad.mask &= quad.depth >= depth; + depth_test_passed = quad.depth >= depth; break; case DepthTestFunction::NotEqual: #ifdef __SSE__ - quad.mask &= quad.depth != depth; + depth_test_passed = quad.depth != depth; #else - quad.mask[0] = bit_cast<u32>(quad.depth[0]) != bit_cast<u32>(depth[0]) ? -1 : 0; - quad.mask[1] = bit_cast<u32>(quad.depth[1]) != bit_cast<u32>(depth[1]) ? -1 : 0; - quad.mask[2] = bit_cast<u32>(quad.depth[2]) != bit_cast<u32>(depth[2]) ? -1 : 0; - quad.mask[3] = bit_cast<u32>(quad.depth[3]) != bit_cast<u32>(depth[3]) ? -1 : 0; + depth_test_passed = i32x4 { + bit_cast<u32>(quad.depth[0]) != bit_cast<u32>(depth[0]) ? -1 : 0, + bit_cast<u32>(quad.depth[1]) != bit_cast<u32>(depth[1]) ? -1 : 0, + bit_cast<u32>(quad.depth[2]) != bit_cast<u32>(depth[2]) ? -1 : 0, + bit_cast<u32>(quad.depth[3]) != bit_cast<u32>(depth[3]) ? -1 : 0, + }; #endif break; case DepthTestFunction::Equal: #ifdef __SSE__ - quad.mask &= quad.depth == depth; + depth_test_passed = quad.depth == depth; #else // // This is an interesting quirk that occurs due to us using the x87 FPU when Serenity is @@ -358,25 +454,52 @@ void Device::rasterize_triangle(const Triangle& triangle) // the first 32-bits of this depth value is "good enough" that if we get a hit on it being // equal, we can pretty much guarantee that it's actually equal. // - quad.mask[0] = bit_cast<u32>(quad.depth[0]) == bit_cast<u32>(depth[0]) ? -1 : 0; - quad.mask[1] = bit_cast<u32>(quad.depth[1]) == bit_cast<u32>(depth[1]) ? -1 : 0; - quad.mask[2] = bit_cast<u32>(quad.depth[2]) == bit_cast<u32>(depth[2]) ? -1 : 0; - quad.mask[3] = bit_cast<u32>(quad.depth[3]) == bit_cast<u32>(depth[3]) ? -1 : 0; + depth_test_passed = i32x4 { + bit_cast<u32>(quad.depth[0]) == bit_cast<u32>(depth[0]) ? -1 : 0, + bit_cast<u32>(quad.depth[1]) == bit_cast<u32>(depth[1]) ? -1 : 0, + bit_cast<u32>(quad.depth[2]) == bit_cast<u32>(depth[2]) ? -1 : 0, + bit_cast<u32>(quad.depth[3]) == bit_cast<u32>(depth[3]) ? -1 : 0, + }; #endif break; case DepthTestFunction::LessOrEqual: - quad.mask &= quad.depth <= depth; + depth_test_passed = quad.depth <= depth; break; case DepthTestFunction::Less: - quad.mask &= quad.depth < depth; + depth_test_passed = quad.depth < depth; break; + default: + VERIFY_NOT_REACHED(); } - // Nice, no pixels passed the depth test -> block rejected by early z + // Update stencil buffer for pixels that failed the depth test + if (m_options.enable_stencil_test) { + write_to_stencil( + stencil_ptrs, + stencil_value, + stencil_configuration.on_depth_test_fail, + stencil_reference_value, + stencil_configuration.write_mask, + quad.mask & ~depth_test_passed); + } + + // Update coverage mask + early quad rejection + quad.mask &= depth_test_passed; if (none(quad.mask)) continue; } + // Update stencil buffer for passed pixels + if (m_options.enable_stencil_test) { + write_to_stencil( + stencil_ptrs, + stencil_value, + stencil_configuration.on_pass, + stencil_reference_value, + stencil_configuration.write_mask, + quad.mask); + } + INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, maskcount(quad.mask)); // Draw the pixels according to the previously generated mask @@ -415,9 +538,8 @@ void Device::rasterize_triangle(const Triangle& triangle) } // Write to depth buffer - if (m_options.enable_depth_test && m_options.enable_depth_write) { + if (m_options.enable_depth_test && m_options.enable_depth_write) store4_masked(quad.depth, depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask); - } // We will not update the color buffer at all if (!m_options.color_mask || !m_options.enable_color_write) @@ -465,8 +587,9 @@ void Device::rasterize_triangle(const Triangle& triangle) } Device::Device(const Gfx::IntSize& size) - : m_render_target { Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, size).release_value_but_fixme_should_propagate_errors() } - , m_depth_buffer { adopt_own(*new DepthBuffer(size)) } + : m_render_target(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, size).release_value_but_fixme_should_propagate_errors()) + , m_depth_buffer(make<DepthBuffer>(size)) + , m_stencil_buffer(MUST(StencilBuffer::try_create(size))) { m_options.scissor_box = m_render_target->rect(); m_options.viewport = m_render_target->rect(); @@ -478,7 +601,8 @@ DeviceInfo Device::info() const .vendor_name = "SerenityOS", .device_name = "SoftGPU", .num_texture_units = NUM_SAMPLERS, - .num_lights = NUM_LIGHTS + .num_lights = NUM_LIGHTS, + .stencil_bits = sizeof(u8) * 8, }; } @@ -626,7 +750,7 @@ void Device::draw_primitives(PrimitiveType primitive_type, FloatMatrix4x4 const& } // Now let's transform each triangle and send that to the GPU - auto const viewport = window_coordinates_to_target_coordinates(m_options.viewport, m_render_target->rect()); + auto const viewport = window_coordinates_to_target_coordinates(m_options.viewport); auto const viewport_half_width = viewport.width() / 2.0f; auto const viewport_half_height = viewport.height() / 2.0f; auto const viewport_center_x = viewport.x() + viewport_half_width; @@ -956,7 +1080,7 @@ void Device::clear_color(const FloatVector4& color) if (m_options.scissor_enabled) { auto fill_rect = m_render_target->rect(); - fill_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box, fill_rect)); + fill_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box)); Gfx::Painter painter { *m_render_target }; painter.fill_rect(fill_rect, fill_color); return; @@ -970,13 +1094,23 @@ void Device::clear_depth(float depth) wait_for_all_threads(); if (m_options.scissor_enabled) { - m_depth_buffer->clear(window_coordinates_to_target_coordinates(m_options.scissor_box, m_render_target->rect()), depth); + m_depth_buffer->clear(window_coordinates_to_target_coordinates(m_options.scissor_box), depth); return; } m_depth_buffer->clear(depth); } +void Device::clear_stencil(u8 value) +{ + Gfx::IntRect clear_rect = m_stencil_buffer->rect(); + + if (m_options.scissor_enabled) + clear_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box)); + + m_stencil_buffer->clear(clear_rect, value); +} + void Device::blit_to_color_buffer_at_raster_position(Gfx::Bitmap const& source) { if (!m_raster_position.valid) @@ -1148,6 +1282,11 @@ void Device::set_material_state(Face face, Material const& material) m_materials[face] = material; } +void Device::set_stencil_configuration(Face face, StencilConfiguration const& stencil_configuration) +{ + m_stencil_configuration[face] = stencil_configuration; +} + void Device::set_raster_position(RasterPosition const& raster_position) { m_raster_position = raster_position; @@ -1192,7 +1331,7 @@ Gfx::IntRect Device::raster_rect_in_target_coordinates(Gfx::IntSize size) size.width(), size.height(), }; - return window_coordinates_to_target_coordinates(raster_rect, m_render_target->rect()); + return window_coordinates_to_target_coordinates(raster_rect); } } diff --git a/Userland/Libraries/LibSoftGPU/Device.h b/Userland/Libraries/LibSoftGPU/Device.h index fc38e2cebb..8d761a49f5 100644 --- a/Userland/Libraries/LibSoftGPU/Device.h +++ b/Userland/Libraries/LibSoftGPU/Device.h @@ -10,6 +10,7 @@ #include <AK/Array.h> #include <AK/NonnullRefPtr.h> #include <AK/OwnPtr.h> +#include <AK/Vector.h> #include <LibGfx/Bitmap.h> #include <LibGfx/Matrix3x3.h> #include <LibGfx/Matrix4x4.h> @@ -26,6 +27,7 @@ #include <LibSoftGPU/Light/Light.h> #include <LibSoftGPU/Light/Material.h> #include <LibSoftGPU/Sampler.h> +#include <LibSoftGPU/StencilBuffer.h> #include <LibSoftGPU/Triangle.h> #include <LibSoftGPU/Vertex.h> @@ -38,6 +40,7 @@ struct TexCoordGenerationConfig { struct RasterizerOptions { bool shade_smooth { true }; + bool enable_stencil_test { false }; bool enable_depth_test { false }; bool enable_depth_write { true }; bool enable_alpha_test { false }; @@ -94,6 +97,17 @@ struct RasterPosition { FloatVector4 texture_coordinates { 0.0f, 0.0f, 0.0f, 1.0f }; }; +struct StencilConfiguration { + StencilTestFunction test_function; + u8 reference_value; + u8 test_mask; + + StencilOperation on_stencil_test_fail; + StencilOperation on_depth_test_fail; + StencilOperation on_pass; + u8 write_mask; +}; + class Device final { public: Device(const Gfx::IntSize& min_size); @@ -104,6 +118,7 @@ public: void resize(const Gfx::IntSize& min_size); void clear_color(const FloatVector4&); void clear_depth(float); + void clear_stencil(u8); void blit_to(Gfx::Bitmap&); void blit_to_color_buffer_at_raster_position(Gfx::Bitmap const&); void blit_to_depth_buffer_at_raster_position(Vector<float> const&, size_t, size_t); @@ -120,6 +135,7 @@ public: void set_sampler_config(unsigned, SamplerConfig const&); void set_light_state(unsigned, Light const&); void set_material_state(Face, Material const&); + void set_stencil_configuration(Face, StencilConfiguration const&); RasterPosition raster_position() const { return m_raster_position; } void set_raster_position(RasterPosition const& raster_position); @@ -128,15 +144,16 @@ public: private: void draw_statistics_overlay(Gfx::Bitmap&); Gfx::IntRect raster_rect_in_target_coordinates(Gfx::IntSize size); + Gfx::IntRect window_coordinates_to_target_coordinates(Gfx::IntRect const&); void rasterize_triangle(const Triangle& triangle); void setup_blend_factors(); void shade_fragments(PixelQuad&); bool test_alpha(PixelQuad&); -private: RefPtr<Gfx::Bitmap> m_render_target; - OwnPtr<DepthBuffer> m_depth_buffer; + NonnullOwnPtr<DepthBuffer> m_depth_buffer; + NonnullOwnPtr<StencilBuffer> m_stencil_buffer; RasterizerOptions m_options; LightModelParameters m_lighting_model; Clipper m_clipper; @@ -149,6 +166,7 @@ private: Array<Light, NUM_LIGHTS> m_lights; Array<Material, 2u> m_materials; RasterPosition m_raster_position; + Array<StencilConfiguration, 2u> m_stencil_configuration; }; } diff --git a/Userland/Libraries/LibSoftGPU/DeviceInfo.h b/Userland/Libraries/LibSoftGPU/DeviceInfo.h index 41038aa2a7..840d87536b 100644 --- a/Userland/Libraries/LibSoftGPU/DeviceInfo.h +++ b/Userland/Libraries/LibSoftGPU/DeviceInfo.h @@ -15,6 +15,7 @@ struct DeviceInfo final { String device_name; unsigned num_texture_units; unsigned num_lights; + u8 stencil_bits; }; } diff --git a/Userland/Libraries/LibSoftGPU/Enums.h b/Userland/Libraries/LibSoftGPU/Enums.h index 38f5fa08fe..79c31d2ac7 100644 --- a/Userland/Libraries/LibSoftGPU/Enums.h +++ b/Userland/Libraries/LibSoftGPU/Enums.h @@ -87,6 +87,28 @@ enum class PrimitiveType { Quads, }; +enum StencilOperation { + Decrement, + DecrementWrap, + Increment, + IncrementWrap, + Invert, + Keep, + Replace, + Zero, +}; + +enum StencilTestFunction { + Always, + Equal, + Greater, + GreaterOrEqual, + Less, + LessOrEqual, + Never, + NotEqual, +}; + enum TexCoordGenerationCoordinate { None = 0x0, S = 0x1, diff --git a/Userland/Libraries/LibSoftGPU/StencilBuffer.cpp b/Userland/Libraries/LibSoftGPU/StencilBuffer.cpp new file mode 100644 index 0000000000..b9aad2e626 --- /dev/null +++ b/Userland/Libraries/LibSoftGPU/StencilBuffer.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include <LibSoftGPU/StencilBuffer.h> + +namespace SoftGPU { + +ErrorOr<NonnullOwnPtr<StencilBuffer>> StencilBuffer::try_create(Gfx::IntSize const& size) +{ + auto rect = Gfx::IntRect { 0, 0, size.width(), size.height() }; + auto data = TRY(FixedArray<u8>::try_create(size.area())); + return adopt_own(*new StencilBuffer(rect, move(data))); +} + +StencilBuffer::StencilBuffer(Gfx::IntRect const& rect, FixedArray<u8> data) + : m_data(move(data)) + , m_rect(rect) +{ +} + +void StencilBuffer::clear(Gfx::IntRect rect, u8 value) +{ + rect.intersect(m_rect); + + for (int y = rect.top(); y <= rect.bottom(); ++y) { + auto* line = scanline(y); + for (int x = rect.left(); x <= rect.right(); ++x) + line[x] = value; + } +} + +u8* StencilBuffer::scanline(int y) +{ + VERIFY(m_rect.contains_vertically(y)); + return &m_data[y * m_rect.width()]; +} + +} diff --git a/Userland/Libraries/LibSoftGPU/StencilBuffer.h b/Userland/Libraries/LibSoftGPU/StencilBuffer.h new file mode 100644 index 0000000000..b79340557a --- /dev/null +++ b/Userland/Libraries/LibSoftGPU/StencilBuffer.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, Jelle Raaijmakers <jelle@gmta.nl> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include <AK/Error.h> +#include <AK/FixedArray.h> +#include <AK/NonnullOwnPtr.h> +#include <AK/Try.h> +#include <LibGfx/Rect.h> +#include <LibGfx/Size.h> + +namespace SoftGPU { + +class StencilBuffer final { +public: + static ErrorOr<NonnullOwnPtr<StencilBuffer>> try_create(Gfx::IntSize const& size); + + void clear(Gfx::IntRect rect, u8 value); + Gfx::IntRect const& rect() const { return m_rect; } + u8* scanline(int y); + +private: + StencilBuffer(Gfx::IntRect const& rect, FixedArray<u8> data); + + FixedArray<u8> m_data; + Gfx::IntRect m_rect; +}; + +} |