summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibCompress/Lzma.cpp
blob: e933dbe2862fc9db6146de0db5b260ae25b44e97 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
/*
 * Copyright (c) 2023, Tim Schumacher <timschumi@gmx.de>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include <LibCompress/Lzma.h>

namespace Compress {

u32 LzmaHeader::dictionary_size() const
{
    // "If the value of dictionary size in properties is smaller than (1 << 12),
    //  the LZMA decoder must set the dictionary size variable to (1 << 12)."
    constexpr u32 minimum_dictionary_size = (1 << 12);
    if (m_dictionary_size < minimum_dictionary_size)
        return minimum_dictionary_size;

    return m_dictionary_size;
}

Optional<u64> LzmaHeader::uncompressed_size() const
{
    // We are making a copy of the packed field here because we would otherwise
    // pass an unaligned reference to the constructor of Optional, which is
    // undefined behavior.
    auto uncompressed_size = m_uncompressed_size;

    // "If "Uncompressed size" field contains ones in all 64 bits, it means that
    //  uncompressed size is unknown and there is the "end marker" in stream,
    //  that indicates the end of decoding point."
    if (uncompressed_size == UINT64_MAX)
        return {};

    // "In opposite case, if the value from "Uncompressed size" field is not
    //  equal to ((2^64) - 1), the LZMA stream decoding must be finished after
    //  specified number of bytes (Uncompressed size) is decoded. And if there
    //  is the "end marker", the LZMA decoder must read that marker also."
    return uncompressed_size;
}

ErrorOr<LzmaModelProperties> LzmaHeader::decode_model_properties(u8 input_bits)
{
    // "Decodes the following values from the encoded model properties field:
    //
    //     name  Range          Description
    //       lc  [0, 8]         the number of "literal context" bits
    //       lp  [0, 4]         the number of "literal pos" bits
    //       pb  [0, 4]         the number of "pos" bits
    //
    //  Encoded using `((pb * 5 + lp) * 9 + lc)`."

    if (input_bits >= (9 * 5 * 5))
        return Error::from_string_literal("Encoded model properties value is larger than the highest possible value");

    u8 literal_context_bits = input_bits % 9;
    input_bits /= 9;
    VERIFY(literal_context_bits >= 0 && literal_context_bits <= 8);

    u8 literal_position_bits = input_bits % 5;
    input_bits /= 5;
    VERIFY(literal_position_bits >= 0 && literal_position_bits <= 4);

    u8 position_bits = input_bits;
    VERIFY(position_bits >= 0 && position_bits <= 4);

    return LzmaModelProperties {
        .literal_context_bits = literal_context_bits,
        .literal_position_bits = literal_position_bits,
        .position_bits = position_bits,
    };
}

ErrorOr<LzmaDecompressorOptions> LzmaHeader::as_decompressor_options() const
{
    auto model_properties = TRY(decode_model_properties(m_encoded_model_properties));

    return Compress::LzmaDecompressorOptions {
        .literal_context_bits = model_properties.literal_context_bits,
        .literal_position_bits = model_properties.literal_position_bits,
        .position_bits = model_properties.position_bits,
        .dictionary_size = dictionary_size(),
        .uncompressed_size = uncompressed_size(),
    };
}

void LzmaDecompressor::initialize_to_default_probability(Span<Probability> span)
{
    for (auto& entry : span)
        entry = default_probability;
}

ErrorOr<NonnullOwnPtr<LzmaDecompressor>> LzmaDecompressor::create_from_container(MaybeOwned<Stream> stream)
{
    auto header = TRY(stream->read_value<LzmaHeader>());

    return TRY(LzmaDecompressor::create_from_raw_stream(move(stream), TRY(header.as_decompressor_options())));
}

ErrorOr<NonnullOwnPtr<LzmaDecompressor>> LzmaDecompressor::create_from_raw_stream(MaybeOwned<Stream> stream, LzmaDecompressorOptions const& options)
{
    // "The LZMA Encoder always writes ZERO in initial byte of compressed stream.
    //  That scheme allows to simplify the code of the Range Encoder in the
    //  LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must
    //  stop decoding and report error."
    {
        auto byte = TRY(stream->read_value<u8>());
        if (byte != 0)
            return Error::from_string_literal("Initial byte of data stream is not zero");
    }

    auto output_buffer = TRY(CircularBuffer::create_empty(options.dictionary_size));

    // "The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where each table contains 0x300 CProb values."
    auto literal_probabilities = TRY(FixedArray<Probability>::create(literal_probability_table_size * (1 << (options.literal_context_bits + options.literal_position_bits))));

    auto decompressor = TRY(adopt_nonnull_own_or_enomem(new (nothrow) LzmaDecompressor(move(stream), options, move(output_buffer), move(literal_probabilities))));

    // Read the initial bytes into the range decoder.
    for (size_t i = 0; i < 4; i++) {
        auto byte = TRY(decompressor->m_stream->read_value<u8>());
        decompressor->m_range_decoder_code = decompressor->m_range_decoder_code << 8 | byte;
    }

    return decompressor;
}

LzmaDecompressor::LzmaDecompressor(MaybeOwned<Stream> stream, LzmaDecompressorOptions options, CircularBuffer output_buffer, FixedArray<Probability> literal_probabilities)
    : m_stream(move(stream))
    , m_options(move(options))
    , m_output_buffer(move(output_buffer))
    , m_literal_probabilities(move(literal_probabilities))
{
    initialize_to_default_probability(m_literal_probabilities.span());

    for (auto& array : m_length_to_position_states)
        initialize_to_default_probability(array);

    for (auto& array : m_binary_tree_distance_probabilities)
        initialize_to_default_probability(array);

    initialize_to_default_probability(m_alignment_bit_probabilities);

    initialize_to_default_probability(m_is_match_probabilities);
    initialize_to_default_probability(m_is_rep_probabilities);
    initialize_to_default_probability(m_is_rep_g0_probabilities);
    initialize_to_default_probability(m_is_rep_g1_probabilities);
    initialize_to_default_probability(m_is_rep_g2_probabilities);
    initialize_to_default_probability(m_is_rep0_long_probabilities);
}

ErrorOr<void> LzmaDecompressor::normalize_range_decoder()
{
    // "The value of the "Range" variable before each bit decoding can not be smaller
    //  than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in
    //  described range."
    constexpr u32 minimum_range_value = 1 << 24;

    if (m_range_decoder_range >= minimum_range_value)
        return {};

    m_range_decoder_range <<= 8;
    m_range_decoder_code <<= 8;

    m_range_decoder_code |= TRY(m_stream->read_value<u8>());

    VERIFY(m_range_decoder_range >= minimum_range_value);

    return {};
}

ErrorOr<u8> LzmaDecompressor::decode_direct_bit()
{
    m_range_decoder_range >>= 1;
    m_range_decoder_code -= m_range_decoder_range;

    u32 temp = 0 - (m_range_decoder_code >> 31);

    m_range_decoder_code += m_range_decoder_range & temp;

    if (m_range_decoder_code == m_range_decoder_range)
        return Error::from_string_literal("Reached an invalid state while decoding LZMA stream");

    TRY(normalize_range_decoder());

    return temp + 1;
}

ErrorOr<u8> LzmaDecompressor::decode_bit_with_probability(Probability& probability)
{
    // "The LZMA decoder provides the pointer to CProb variable that contains
    //  information about estimated probability for symbol 0 and the Range Decoder
    //  updates that CProb variable after decoding."

    // The significance of the shift width is not explained and appears to be a magic constant.
    constexpr size_t probability_shift_width = 5;

    u32 bound = (m_range_decoder_range >> probability_bit_count) * probability;

    if (m_range_decoder_code < bound) {
        probability += ((1 << probability_bit_count) - probability) >> probability_shift_width;
        m_range_decoder_range = bound;
        TRY(normalize_range_decoder());
        return 0;
    } else {
        probability -= probability >> probability_shift_width;
        m_range_decoder_code -= bound;
        m_range_decoder_range -= bound;
        TRY(normalize_range_decoder());
        return 1;
    }
}

ErrorOr<u16> LzmaDecompressor::decode_symbol_using_bit_tree(size_t bit_count, Span<Probability> probability_tree)
{
    VERIFY(bit_count <= sizeof(u16) * 8);
    VERIFY(probability_tree.size() >= 1ul << bit_count);

    // This has been modified from the reference implementation to unlink the result and the tree index,
    // which should allow for better readability.

    u16 result = 0;
    size_t tree_index = 1;

    for (size_t i = 0; i < bit_count; i++) {
        u16 next_bit = TRY(decode_bit_with_probability(probability_tree[tree_index]));
        result = (result << 1) | next_bit;
        tree_index = (tree_index << 1) | next_bit;
    }

    return result;
}

ErrorOr<u16> LzmaDecompressor::decode_symbol_using_reverse_bit_tree(size_t bit_count, Span<Probability> probability_tree)
{
    VERIFY(bit_count <= sizeof(u16) * 8);
    VERIFY(probability_tree.size() >= 1ul << bit_count);

    u16 result = 0;
    size_t tree_index = 1;

    for (size_t i = 0; i < bit_count; i++) {
        u16 next_bit = TRY(decode_bit_with_probability(probability_tree[tree_index]));
        result |= next_bit << i;
        tree_index = (tree_index << 1) | next_bit;
    }

    return result;
}

ErrorOr<void> LzmaDecompressor::decode_literal_to_output_buffer()
{
    u8 previous_byte = 0;
    if (m_total_decoded_bytes > 0) {
        auto read_bytes = MUST(m_output_buffer.read_with_seekback({ &previous_byte, sizeof(previous_byte) }, 1));
        VERIFY(read_bytes.size() == sizeof(previous_byte));
    }

    // "To select the table for decoding it uses the context that consists of
    //  (lc) high bits from previous literal and (lp) low bits from value that
    //  represents current position in outputStream."
    u16 literal_state_bits_from_position = m_total_decoded_bytes & ((1 << m_options.literal_position_bits) - 1);
    u16 literal_state_bits_from_output = previous_byte >> (8 - m_options.literal_context_bits);
    u16 literal_state = literal_state_bits_from_position << m_options.literal_context_bits | literal_state_bits_from_output;

    Span<Probability> selected_probability_table = m_literal_probabilities.span().slice(literal_probability_table_size * literal_state, literal_probability_table_size);

    // The result is defined as u16 here and initialized to 1, but we will cut off the top bits before queueing them into the output buffer.
    // The top bit is only used to track how much we have decoded already, and to select the correct probability table.
    u16 result = 1;

    // "If (State > 7), the Literal Decoder also uses "matchByte" that represents
    //  the byte in OutputStream at position the is the DISTANCE bytes before
    //  current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair
    //  of latest decoded match."
    // Note: The specification says `(State > 7)`, but the reference implementation does `(State >= 7)`, which is a mismatch.
    //       Testing `(State > 7)` with actual test files yields errors, so the reference implementation appears to be the correct one.
    if (m_state >= 7) {
        u8 matched_byte = 0;
        auto read_bytes = TRY(m_output_buffer.read_with_seekback({ &matched_byte, sizeof(matched_byte) }, m_rep0 + 1));
        VERIFY(read_bytes.size() == sizeof(matched_byte));

        do {
            u8 match_bit = (matched_byte >> 7) & 1;
            matched_byte <<= 1;

            u8 decoded_bit = TRY(decode_bit_with_probability(selected_probability_table[((1 + match_bit) << 8) + result]));
            result = result << 1 | decoded_bit;

            if (match_bit != decoded_bit)
                break;
        } while (result < 0x100);
    }

    while (result < 0x100)
        result = (result << 1) | TRY(decode_bit_with_probability(selected_probability_table[result]));

    u8 actual_result = result - 0x100;

    size_t written_bytes = m_output_buffer.write({ &actual_result, sizeof(actual_result) });
    VERIFY(written_bytes == sizeof(actual_result));
    m_total_decoded_bytes += sizeof(actual_result);

    return {};
}

LzmaDecompressor::LzmaLengthDecoderState::LzmaLengthDecoderState()
{
    for (auto& array : m_low_length_probabilities)
        initialize_to_default_probability(array);

    for (auto& array : m_medium_length_probabilities)
        initialize_to_default_probability(array);

    initialize_to_default_probability(m_high_length_probabilities);
}

ErrorOr<u16> LzmaDecompressor::decode_normalized_match_length(LzmaLengthDecoderState& length_decoder_state)
{
    // "LZMA uses "posState" value as context to select the binary tree
    //  from LowCoder and MidCoder binary tree arrays:"
    u16 position_state = m_total_decoded_bytes & ((1 << m_options.position_bits) - 1);

    // "The following scheme is used for the match length encoding:
    //
    //   Binary encoding    Binary Tree structure    Zero-based match length
    //   sequence                                    (binary + decimal):
    //
    //   0 xxx              LowCoder[posState]       xxx
    if (TRY(decode_bit_with_probability(length_decoder_state.m_first_choice_probability)) == 0)
        return TRY(decode_symbol_using_bit_tree(3, length_decoder_state.m_low_length_probabilities[position_state].span()));

    //   1 0 yyy            MidCoder[posState]       yyy + 8
    if (TRY(decode_bit_with_probability(length_decoder_state.m_second_choice_probability)) == 0)
        return TRY(decode_symbol_using_bit_tree(3, length_decoder_state.m_medium_length_probabilities[position_state].span())) + 8;

    //   1 1 zzzzzzzz       HighCoder                zzzzzzzz + 16"
    return TRY(decode_symbol_using_bit_tree(8, length_decoder_state.m_high_length_probabilities.span())) + 16;
}

ErrorOr<u32> LzmaDecompressor::decode_normalized_match_distance(u16 normalized_match_length)
{
    // "LZMA uses normalized match length (zero-based length)
    //  to calculate the context state "lenState" do decode the distance value."
    u16 length_state = min(normalized_match_length, number_of_length_to_position_states - 1);

    // "At first stage the distance decoder decodes 6-bit "posSlot" value with bit
    //  tree decoder from PosSlotDecoder array."
    u16 position_slot = TRY(decode_symbol_using_bit_tree(6, m_length_to_position_states[length_state].span()));

    // "The encoding scheme for distance value is shown in the following table:
    //
    //  posSlot (decimal) /
    //       zero-based distance (binary)
    //  0    0
    //  1    1
    //  2    10
    //  3    11
    //
    //  4    10 x
    //  5    11 x
    //  6    10 xx
    //  7    11 xx
    //  8    10 xxx
    //  9    11 xxx
    //  10    10 xxxx
    //  11    11 xxxx
    //  12    10 xxxxx
    //  13    11 xxxxx
    //
    //  14    10 yy zzzz
    //  15    11 yy zzzz
    //  16    10 yyy zzzz
    //  17    11 yyy zzzz
    //  ...
    //  62    10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
    //  63    11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
    //
    //  where
    //   "x ... x" means the sequence of binary symbols encoded with binary tree and
    //       "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13.
    //   "y" means direct bit encoded with range coder.
    //   "zzzz" means the sequence of four binary symbols encoded with binary
    //       tree with "Reverse" scheme, where one common binary tree "AlignDecoder"
    //       is used for all posSlot values."

    // "If (posSlot < 4), the "dist" value is equal to posSlot value."
    if (position_slot < first_position_slot_with_binary_tree_bits)
        return position_slot;

    // From here on, the first bit of the distance is always set and the second bit is set if the last bit of the position slot is set.
    u32 distance_prefix = ((1 << 1) | ((position_slot & 1) << 0));

    // "If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of
    //   the high bits of "dist" value and the number of the low bits.
    //   If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders.
    //     (one separated bit tree decoder per one posSlot value) and "Reverse" scheme."
    if (position_slot < first_position_slot_with_direct_encoded_bits) {
        size_t number_of_bits_to_decode = (position_slot / 2) - 1;
        auto& selected_probability_tree = m_binary_tree_distance_probabilities[position_slot - first_position_slot_with_binary_tree_bits];
        return (distance_prefix << number_of_bits_to_decode) | TRY(decode_symbol_using_reverse_bit_tree(number_of_bits_to_decode, selected_probability_tree));
    }

    // "  if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct
    //     bits from RangeDecoder and the low 4 bits are decoded with a bit tree
    //     decoder "AlignDecoder" with "Reverse" scheme."
    size_t number_of_direct_bits_to_decode = ((position_slot - first_position_slot_with_direct_encoded_bits) / 2) + 2;
    for (size_t i = 0; i < number_of_direct_bits_to_decode; i++) {
        distance_prefix = (distance_prefix << 1) | TRY(decode_direct_bit());
    }
    return (distance_prefix << number_of_alignment_bits) | TRY(decode_symbol_using_reverse_bit_tree(number_of_alignment_bits, m_alignment_bit_probabilities));
}

ErrorOr<Bytes> LzmaDecompressor::read_some(Bytes bytes)
{
    while (m_output_buffer.used_space() < bytes.size() && m_output_buffer.empty_space() != 0) {
        if (m_found_end_of_stream_marker) {
            if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes < m_options.uncompressed_size.value())
                return Error::from_string_literal("Found end-of-stream marker earlier than expected");

            break;
        }

        if (m_options.uncompressed_size.has_value() && m_total_decoded_bytes >= m_options.uncompressed_size.value()) {
            // FIXME: This should validate that either EOF or the 'end of stream' marker follow immediately.
            //        Both of those cases count as the 'end of stream' marker being found and should check for a clean decoder state.
            break;
        }

        // "The decoder calculates "state2" variable value to select exact variable from
        //  "IsMatch" and "IsRep0Long" arrays."
        u16 position_state = m_total_decoded_bytes & ((1 << m_options.position_bits) - 1);
        u16 state2 = (m_state << maximum_number_of_position_bits) + position_state;

        auto update_state_after_literal = [&] {
            if (m_state < 4)
                m_state = 0;
            else if (m_state < 10)
                m_state -= 3;
            else
                m_state -= 6;
        };

        auto update_state_after_match = [&] {
            if (m_state < 7)
                m_state = 7;
            else
                m_state = 10;
        };

        auto update_state_after_rep = [&] {
            if (m_state < 7)
                m_state = 8;
            else
                m_state = 11;
        };

        auto update_state_after_short_rep = [&] {
            if (m_state < 7)
                m_state = 9;
            else
                m_state = 11;
        };

        auto copy_match_to_buffer = [&](u16 real_length) -> ErrorOr<void> {
            VERIFY(!m_leftover_match_length.has_value());

            if (m_options.uncompressed_size.has_value() && m_options.uncompressed_size.value() < m_total_decoded_bytes + real_length)
                return Error::from_string_literal("Tried to copy match beyond expected uncompressed file size");

            while (real_length > 0) {
                if (m_output_buffer.empty_space() == 0) {
                    m_leftover_match_length = real_length;
                    break;
                }

                u8 byte;
                auto read_bytes = TRY(m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, m_rep0 + 1));
                VERIFY(read_bytes.size() == sizeof(byte));

                auto written_bytes = m_output_buffer.write({ &byte, sizeof(byte) });
                VERIFY(written_bytes == sizeof(byte));
                m_total_decoded_bytes++;

                real_length--;
            }

            return {};
        };

        // If we have a leftover part of a repeating match, we should finish that first.
        if (m_leftover_match_length.has_value()) {
            TRY(copy_match_to_buffer(m_leftover_match_length.release_value()));
            continue;
        }

        // "The decoder uses the following code flow scheme to select exact
        //  type of LITERAL or MATCH:
        //
        //  IsMatch[state2] decode
        //   0 - the Literal"
        if (TRY(decode_bit_with_probability(m_is_match_probabilities[state2])) == 0) {
            // "At first the LZMA decoder must check that it doesn't exceed
            //  specified uncompressed size."
            // This is already checked for at the beginning of the loop.

            // "Then it decodes literal value and puts it to sliding window."
            TRY(decode_literal_to_output_buffer());

            // "Then the decoder must update the "state" value."
            update_state_after_literal();
            continue;
        }

        // " 1 - the Match
        //     IsRep[state] decode
        //       0 - Simple Match"
        if (TRY(decode_bit_with_probability(m_is_rep_probabilities[m_state])) == 0) {
            // "The distance history table is updated with the following scheme:"
            m_rep3 = m_rep2;
            m_rep2 = m_rep1;
            m_rep1 = m_rep0;

            // "The zero-based length is decoded with "LenDecoder"."
            u16 normalized_length = TRY(decode_normalized_match_length(m_length_decoder));

            // "The state is update with UpdateState_Match function."
            update_state_after_match();

            // "and the new "rep0" value is decoded with DecodeDistance."
            m_rep0 = TRY(decode_normalized_match_distance(normalized_length));

            // "If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have
            //  "End of stream" marker, so we can stop decoding and check finishing
            //  condition in Range Decoder"
            if (m_rep0 == 0xFFFFFFFF) {
                // The range decoder condition is checked after breaking out of the loop.
                m_found_end_of_stream_marker = true;
                continue;
            }

            // "If uncompressed size is defined, LZMA decoder must check that it doesn't
            //  exceed that specified uncompressed size."
            // This is being checked for in the common "copy to buffer" implementation.

            // "Also the decoder must check that "rep0" value is not larger than dictionary size
            //  and is not larger than the number of already decoded bytes."
            if (m_rep0 > min(m_options.dictionary_size, m_total_decoded_bytes))
                return Error::from_string_literal("rep0 value is larger than the possible lookback size");

            // "Then the decoder must copy match bytes as described in
            //  "The match symbols copying" section."
            TRY(copy_match_to_buffer(normalized_length + normalized_to_real_match_length_offset));

            continue;
        }

        // "     1 - Rep Match
        //         IsRepG0[state] decode
        //           0 - the distance is rep0"
        if (TRY(decode_bit_with_probability(m_is_rep_g0_probabilities[m_state])) == 0) {
            // "LZMA doesn't update the distance history."

            // "       IsRep0Long[state2] decode
            //           0 - Short Rep Match"
            if (TRY(decode_bit_with_probability(m_is_rep0_long_probabilities[state2])) == 0) {
                // "If the subtype is "Short Rep Match", the decoder updates the state, puts
                //  the one byte from window to current position in window and goes to next
                //  MATCH/LITERAL symbol."
                update_state_after_short_rep();

                u8 byte;
                auto read_bytes = TRY(m_output_buffer.read_with_seekback({ &byte, sizeof(byte) }, m_rep0 + 1));
                VERIFY(read_bytes.size() == sizeof(byte));

                auto written_bytes = m_output_buffer.write({ &byte, sizeof(byte) });
                VERIFY(written_bytes == sizeof(byte));
                m_total_decoded_bytes++;

                continue;
            }
            // "         1 - Rep Match 0"
            // Intentional fallthrough, we just need to make sure to not run the detection for other match types and to not switch around the distance history.
        } else {
            // "     1 -
            //         IsRepG1[state] decode
            //           0 - Rep Match 1"
            if (TRY(decode_bit_with_probability(m_is_rep_g1_probabilities[m_state])) == 0) {
                u32 distance = m_rep1;
                m_rep1 = m_rep0;
                m_rep0 = distance;
            }

            // "         1 -
            //             IsRepG2[state] decode
            //               0 - Rep Match 2"
            else if (TRY(decode_bit_with_probability(m_is_rep_g2_probabilities[m_state])) == 0) {
                u32 distance = m_rep2;
                m_rep2 = m_rep1;
                m_rep1 = m_rep0;
                m_rep0 = distance;
            }

            // "             1 - Rep Match 3"
            else {
                u32 distance = m_rep3;
                m_rep3 = m_rep2;
                m_rep2 = m_rep1;
                m_rep1 = m_rep0;
                m_rep0 = distance;
            }
        }

        // "In other cases (Rep Match 0/1/2/3), it decodes the zero-based
        //  length of match with "RepLenDecoder" decoder."
        u16 normalized_length = TRY(decode_normalized_match_length(m_rep_length_decoder));

        // "Then it updates the state."
        update_state_after_rep();

        // "Then the decoder must copy match bytes as described in
        //  "The Match symbols copying" section."
        TRY(copy_match_to_buffer(normalized_length + normalized_to_real_match_length_offset));
    }

    if (m_found_end_of_stream_marker) {
        if (m_range_decoder_code != 0)
            return Error::from_string_literal("LZMA stream ends in an unclean state");
    }

    return m_output_buffer.read(bytes);
}

ErrorOr<size_t> LzmaDecompressor::write_some(ReadonlyBytes)
{
    return Error::from_errno(EBADF);
}

bool LzmaDecompressor::is_eof() const
{
    if (m_output_buffer.used_space() > 0)
        return false;

    if (m_options.uncompressed_size.has_value())
        return m_total_decoded_bytes >= m_options.uncompressed_size.value();

    return m_found_end_of_stream_marker;
}

bool LzmaDecompressor::is_open() const
{
    return true;
}

void LzmaDecompressor::close()
{
}

}