LibVideo/VP9: Split/clean up the token tree-parsing context function

Since the context information for parsing residual tokens changes based on whether we're parsing the first coefficient or subsequent ones, the TreeParser::get_tokens_context function was split into two new ones to allow them to read more cleanly. All variables now have meaningful names to aid in readability as well. The math used in the function for the first token was changed to be more friendly to tile- or block-specific coordinates to facilitate range-restricted Spans of the above and left context arrays.
author: Zaggy1024 <zaggy1024@gmail.com> 2022-11-28 00:15:53 -0600
committer: Andreas Kling <kling@serenityos.org> 2022-11-30 08:28:30 +0100
commit: 06082d310f9664dbe816bda1e031013a3943d055 (patch)
tree: 7911b179094f28a9676d1bc1793685037f1d3958 /Userland/Libraries/LibVideo
parent: 3667f9bf2c7021cc4fa22baee7f92e7402d185af (diff)
download: serenity-06082d310f9664dbe816bda1e031013a3943d055.zip
3 files changed, 67 insertions, 51 deletions
diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp
index 6753f5b2c8..c9dd89c6a4 100644
--- a/Userland/Libraries/LibVideo/VP9/Parser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp
@@ -1391,7 +1391,7 @@ DecoderErrorOr<bool> Parser::residual(BlockContext& block_context, bool has_bloc
                         TRY(m_decoder.predict_intra(plane, block_context, transform_x_in_px, transform_y_in_px, has_block_left || x > 0, has_block_above || y > 0, (x + transform_size_in_sub_blocks) < block_size_in_sub_blocks.width(), transform_size, sub_block_index));
                     if (!block_context.should_skip_residuals) {
                         auto transform_set = select_transform_type(block_context, plane, transform_size, sub_block_index);
-                        sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, transform_x_in_px, transform_y_in_px, transform_size, transform_set, token_cache));
+                        sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, x, y, transform_size, transform_set, token_cache));
                         block_had_non_zero_tokens = block_had_non_zero_tokens || sub_block_had_non_zero_tokens;
                         TRY(m_decoder.reconstruct(plane, block_context, transform_x_in_px, transform_y_in_px, transform_size, transform_set));
                     }
@@ -1445,7 +1445,7 @@ static u16 const* get_scan(TransformSize transform_size, TransformSet transform_
     return default_scan_32x32;
 }
 
-DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u32 start_x, u32 start_y, TransformSize transform_size, TransformSet transform_set, Array<u8, 1024> token_cache)
+DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u32 sub_block_column, u32 sub_block_row, TransformSize transform_size, TransformSet transform_set, Array<u8, 1024> token_cache)
 {
     block_context.residual_tokens.fill(0);
 
@@ -1453,17 +1453,21 @@ DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u
 
     auto check_for_more_coefficients = true;
     u16 coef_index = 0;
-    u16 segment_eob = 16 << (transform_size << 1);
-    for (; coef_index < segment_eob; coef_index++) {
-        auto pos = scan[coef_index];
+    u16 transform_pixel_count = 16 << (transform_size << 1);
+    for (; coef_index < transform_pixel_count; coef_index++) {
         auto band = (transform_size == Transform_4x4) ? coefband_4x4[coef_index] : coefband_8x8plus[coef_index];
-        auto tokens_context = TreeParser::get_tokens_context(block_context.frame_context.color_config.subsampling_x, block_context.frame_context.color_config.subsampling_y, block_context.frame_context.rows(), block_context.frame_context.columns(), m_above_nonzero_context, m_left_nonzero_context, token_cache, transform_size, transform_set, plane, start_x, start_y, pos, block_context.is_inter_predicted(), band, coef_index);
+        auto token_position = scan[coef_index];
+        TokensContext tokens_context;
+        if (coef_index == 0)
+            tokens_context = TreeParser::get_context_for_first_token(block_context, m_above_nonzero_context, m_left_nonzero_context, transform_size, plane, sub_block_column, sub_block_row, block_context.is_inter_predicted(), band);
+        else
+            tokens_context = TreeParser::get_context_for_other_tokens(token_cache, transform_size, transform_set, plane, token_position, block_context.is_inter_predicted(), band);
 
         if (check_for_more_coefficients && !TRY_READ(TreeParser::parse_more_coefficients(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context)))
             break;
 
         auto token = TRY_READ(TreeParser::parse_token(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context));
-        token_cache[pos] = energy_class[token];
+        token_cache[token_position] = energy_class[token];
 
         i32 coef;
         if (token == ZeroToken) {
@@ -1473,7 +1477,7 @@ DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u
             coef = TRY(read_coef(block_context.frame_context.color_config.bit_depth, token));
             check_for_more_coefficients = true;
         }
-        block_context.residual_tokens[pos] = coef;
+        block_context.residual_tokens[token_position] = coef;
     }
 
     return coef_index > 0;
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
index 5c8c571cdb..a1dcb1dfaf 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
@@ -7,10 +7,12 @@
 
 #include <AK/Function.h>
 
+#include "Context.h"
 #include "Enums.h"
 #include "LookupTables.h"
 #include "Parser.h"
 #include "TreeParser.h"
+#include "Utilities.h"
 
 namespace Video::VP9 {
 
@@ -624,54 +626,62 @@ ErrorOr<bool> TreeParser::parse_motion_vector_hp(BitStream& bit_stream, Probabil
     return value;
 }
 
-TokensContext TreeParser::get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, Array<u8, 1024> token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index)
+TokensContext TreeParser::get_context_for_first_token(BlockContext const& block_context, Array<Vector<bool>, 3> const& above_non_zero_tokens, Array<Vector<bool>, 3> const& left_non_zero_tokens, TransformSize transform_size, u8 plane, u32 sub_block_column, u32 sub_block_row, bool is_inter, u8 band)
 {
-    u8 context;
-    if (coef_index == 0) {
-        auto sx = plane > 0 ? subsampling_x : false;
-        auto sy = plane > 0 ? subsampling_y : false;
-        auto max_x = (2 * columns) >> sx;
-        auto max_y = (2 * rows) >> sy;
-        u8 numpts = 1 << transform_size;
-        auto x4 = start_x >> 2;
-        auto y4 = start_y >> 2;
-        u32 above = 0;
-        u32 left = 0;
-        for (size_t i = 0; i < numpts; i++) {
-            if (x4 + i < max_x)
-                above |= above_nonzero_context[plane][x4 + i];
-            if (y4 + i < max_y)
-                left |= left_nonzero_context[plane][y4 + i];
+    auto subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false;
+    auto subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false;
+    auto transform_top_in_sub_blocks = (blocks_to_sub_blocks(block_context.row) >> subsampling_y) + sub_block_row;
+    auto transform_left_in_sub_blocks = (blocks_to_sub_blocks(block_context.column) >> subsampling_x) + sub_block_column;
+    u8 transform_size_in_sub_blocks = transform_size_to_sub_blocks(transform_size);
+    bool above_has_non_zero_tokens = false;
+    for (u8 x = 0; x < transform_size_in_sub_blocks && x < above_non_zero_tokens[plane].size() - transform_left_in_sub_blocks; x++) {
+        if (above_non_zero_tokens[plane][transform_left_in_sub_blocks + x]) {
+            above_has_non_zero_tokens = true;
+            break;
         }
-        context = above + left;
-    } else {
-        u32 neighbor_0, neighbor_1;
-        auto n = 4 << transform_size;
-        auto i = position / n;
-        auto j = position % n;
-        auto a = i > 0 ? (i - 1) * n + j : 0;
-        auto a2 = i * n + j - 1;
-        if (i > 0 && j > 0) {
-            if (transform_set == TransformSet { TransformType::DCT, TransformType::ADST }) {
-                neighbor_0 = a;
-                neighbor_1 = a;
-            } else if (transform_set == TransformSet { TransformType::ADST, TransformType::DCT }) {
-                neighbor_0 = a2;
-                neighbor_1 = a2;
-            } else {
-                neighbor_0 = a;
-                neighbor_1 = a2;
-            }
-        } else if (i > 0) {
-            neighbor_0 = a;
-            neighbor_1 = a;
+    }
+    bool left_has_non_zero_tokens = false;
+    for (u8 y = 0; y < transform_size_in_sub_blocks && y < left_non_zero_tokens[plane].size() - transform_top_in_sub_blocks; y++) {
+        if (left_non_zero_tokens[plane][transform_top_in_sub_blocks + y]) {
+            left_has_non_zero_tokens = true;
+            break;
+        }
+    }
+
+    u8 context = above_has_non_zero_tokens + left_has_non_zero_tokens;
+    return TokensContext { transform_size, plane > 0, is_inter, band, context };
+}
+
+TokensContext TreeParser::get_context_for_other_tokens(Array<u8, 1024> token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u16 token_position, bool is_inter, u8 band)
+{
+    auto transform_size_in_pixels = sub_blocks_to_pixels(transform_size_to_sub_blocks(transform_size));
+    auto log2_of_transform_size = transform_size + 2;
+    auto pixel_y = token_position >> log2_of_transform_size;
+    auto pixel_x = token_position - (pixel_y << log2_of_transform_size);
+    auto above_token_energy = pixel_y > 0 ? (pixel_y - 1) * transform_size_in_pixels + pixel_x : 0;
+    auto left_token_energy = pixel_y * transform_size_in_pixels + pixel_x - 1;
+
+    u32 neighbor_a, neighbor_b;
+    if (pixel_y > 0 && pixel_x > 0) {
+        if (transform_set == TransformSet { TransformType::DCT, TransformType::ADST }) {
+            neighbor_a = above_token_energy;
+            neighbor_b = above_token_energy;
+        } else if (transform_set == TransformSet { TransformType::ADST, TransformType::DCT }) {
+            neighbor_a = left_token_energy;
+            neighbor_b = left_token_energy;
         } else {
-            neighbor_0 = a2;
-            neighbor_1 = a2;
+            neighbor_a = above_token_energy;
+            neighbor_b = left_token_energy;
         }
-        context = (1 + token_cache[neighbor_0] + token_cache[neighbor_1]) >> 1;
+    } else if (pixel_y > 0) {
+        neighbor_a = above_token_energy;
+        neighbor_b = above_token_energy;
+    } else {
+        neighbor_a = left_token_energy;
+        neighbor_b = left_token_energy;
     }
 
+    u8 context = (1 + token_cache[neighbor_a] + token_cache[neighbor_b]) >> 1;
     return TokensContext { transform_size, plane > 0, is_inter, band, context };
 }
 
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h
index 0fd29241aa..349a4d4f16 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.h
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h
@@ -17,6 +17,7 @@ namespace Video::VP9 {
 
 class Parser;
 
+struct BlockContext;
 struct FrameBlockContext;
 
 struct TokensContext {
@@ -86,7 +87,8 @@ public:
     static ErrorOr<u8> parse_motion_vector_fr(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component);
     static ErrorOr<bool> parse_motion_vector_hp(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component, bool use_hp);
 
-    static TokensContext get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, Array<u8, 1024> token_cache, TransformSize, TransformSet, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index);
+    static TokensContext get_context_for_first_token(BlockContext const& block_context, Array<Vector<bool>, 3> const& above_non_zero_tokens, Array<Vector<bool>, 3> const& left_non_zero_tokens, TransformSize transform_size, u8 plane, u32 sub_block_column, u32 sub_block_row, bool is_inter, u8 band);
+    static TokensContext get_context_for_other_tokens(Array<u8, 1024> token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u16 token_position, bool is_inter, u8 band);
     static ErrorOr<bool> parse_more_coefficients(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context);
     static ErrorOr<Token> parse_token(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context);
 };
author	Zaggy1024 <zaggy1024@gmail.com>	2022-11-28 00:15:53 -0600
committer	Andreas Kling <kling@serenityos.org>	2022-11-30 08:28:30 +0100
commit	06082d310f9664dbe816bda1e031013a3943d055 (patch)
tree	7911b179094f28a9676d1bc1793685037f1d3958 /Userland/Libraries/LibVideo
parent	3667f9bf2c7021cc4fa22baee7f92e7402d185af (diff)
download	serenity-06082d310f9664dbe816bda1e031013a3943d055.zip