diff options
author | Stephan Unverwerth <s.unverwerth@serenityos.org> | 2022-01-01 18:19:19 +0100 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2022-01-09 16:21:13 +0330 |
commit | 6f261d03626c96bc86d7c8b3e4b5e4ac3665ce02 (patch) | |
tree | 901b8f12e5a7c5502b3d4fa54ae876c318be46e3 /Userland | |
parent | 034dc480d2ed64167fc68878907a3fa1c18c05c0 (diff) | |
download | serenity-6f261d03626c96bc86d7c8b3e4b5e4ac3665ce02.zip |
LibSoftGPU: Use bitwise and instead of modulus operator for POT textures
Where possible the sampler will wrap texture coordinates using a bitwise
and instead of a modulus. This speeds up the calculation quite a bit.
Diffstat (limited to 'Userland')
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Image.cpp | 9 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Image.h | 6 | ||||
-rw-r--r-- | Userland/Libraries/LibSoftGPU/Sampler.cpp | 37 |
3 files changed, 46 insertions, 6 deletions
diff --git a/Userland/Libraries/LibSoftGPU/Image.cpp b/Userland/Libraries/LibSoftGPU/Image.cpp index 972073dfe4..34ad239f59 100644 --- a/Userland/Libraries/LibSoftGPU/Image.cpp +++ b/Userland/Libraries/LibSoftGPU/Image.cpp @@ -21,6 +21,15 @@ Image::Image(ImageFormat format, unsigned width, unsigned height, unsigned depth VERIFY(levels > 0); VERIFY(layers > 0); + if ((width & (width - 1)) == 0) + m_width_is_power_of_two = true; + + if ((height & (height - 1)) == 0) + m_height_is_power_of_two = true; + + if ((depth & (depth - 1)) == 0) + m_depth_is_power_of_two = true; + m_mipmap_sizes.append({ width, height, depth }); m_mipmap_offsets.append(0); diff --git a/Userland/Libraries/LibSoftGPU/Image.h b/Userland/Libraries/LibSoftGPU/Image.h index 0edc0ce9de..884e5d59c7 100644 --- a/Userland/Libraries/LibSoftGPU/Image.h +++ b/Userland/Libraries/LibSoftGPU/Image.h @@ -28,6 +28,9 @@ public: unsigned level_depth(unsigned level) const { return m_mipmap_sizes[level].z(); } unsigned num_levels() const { return m_num_levels; } unsigned num_layers() const { return m_num_layers; } + bool width_is_power_of_two() const { return m_width_is_power_of_two; } + bool height_is_power_of_two() const { return m_height_is_power_of_two; } + bool depth_is_power_of_two() const { return m_depth_is_power_of_two; } FloatVector4 texel(unsigned layer, unsigned level, unsigned x, unsigned y, unsigned z) const { @@ -68,6 +71,9 @@ private: Vector<size_t, 16> m_mipmap_offsets; Vector<Vector3<unsigned>, 16> m_mipmap_sizes; Vector<u8> m_data; + bool m_width_is_power_of_two { false }; + bool m_height_is_power_of_two { false }; + bool m_depth_is_power_of_two { false }; }; } diff --git a/Userland/Libraries/LibSoftGPU/Sampler.cpp b/Userland/Libraries/LibSoftGPU/Sampler.cpp index 973a7e9f0a..c713016181 100644 --- a/Userland/Libraries/LibSoftGPU/Sampler.cpp +++ b/Userland/Libraries/LibSoftGPU/Sampler.cpp @@ -128,6 +128,9 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv) image.level_height(level[3]), }; + u32x4 width_mask = width - 1; + u32x4 height_mask = height - 1; + f32x4 s = wrap(uv.x(), m_config.texture_wrap_u, width); f32x4 t = wrap(uv.y(), m_config.texture_wrap_v, height); @@ -135,21 +138,43 @@ Vector4<AK::SIMD::f32x4> Sampler::sample_2d(Vector2<AK::SIMD::f32x4> const& uv) f32x4 v = t * to_f32x4(height); if (m_config.texture_mag_filter == TextureFilter::Nearest) { - u32x4 i = to_i32x4(u) % width; - u32x4 j = to_i32x4(v) % height; + u32x4 i = to_u32x4(u); + u32x4 j = to_u32x4(v); u32x4 k = expand4(0u); + i = image.width_is_power_of_two() ? i & width_mask : i % width; + j = image.height_is_power_of_two() ? j & height_mask : j % height; + return texel4(image, layer, level, i, j, k); } u -= 0.5f; v -= 0.5f; - i32x4 i0 = m_config.texture_wrap_u == TextureWrapMode::Repeat ? to_i32x4(to_u32x4(floor_int_range(u)) % width) : to_i32x4(floor_int_range(u)); - i32x4 j0 = m_config.texture_wrap_v == TextureWrapMode::Repeat ? to_i32x4(to_u32x4(floor_int_range(v)) % height) : to_i32x4(floor_int_range(v)); + i32x4 i0 = to_i32x4(floor_int_range(u)); + i32x4 i1 = i0 + 1; + i32x4 j0 = to_i32x4(floor_int_range(v)); + i32x4 j1 = j0 + 1; + + if (m_config.texture_wrap_u == TextureWrapMode::Repeat) { + if (image.width_is_power_of_two()) { + i0 = (i32x4)(i0 & width_mask); + i1 = (i32x4)(i1 & width_mask); + } else { + i0 = (i32x4)(i0 % width); + i1 = (i32x4)(i1 % width); + } + } - i32x4 i1 = m_config.texture_wrap_u == TextureWrapMode::Repeat ? to_i32x4((i0 + 1) % width) : i0 + 1; - i32x4 j1 = m_config.texture_wrap_v == TextureWrapMode::Repeat ? to_i32x4((j0 + 1) % height) : j0 + 1; + if (m_config.texture_wrap_v == TextureWrapMode::Repeat) { + if (image.height_is_power_of_two()) { + j0 = (i32x4)(j0 & height_mask); + j1 = (i32x4)(j1 & height_mask); + } else { + j0 = (i32x4)(j0 % height); + j1 = (i32x4)(j1 % height); + } + } u32x4 k = expand4(0u); |