summaryrefslogtreecommitdiff
path: root/Userland/Libraries/LibDiff/Generator.cpp
blob: 3224ea6525defb738ba602a5c11ba073bc6eaa13 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
 *
 * SPDX-License-Identifier: BSD-2-Clause
 */

#include "Generator.h"

namespace Diff {

Vector<Hunk> from_text(StringView old_text, StringView new_text)
{
    auto old_lines = old_text.lines();
    auto new_lines = new_text.lines();

    /**
     * This is a simple implementation of the Longest Common Subsequence algorithm (over
     * the lines of the text as opposed to the characters). A Dynamic programming approach
     * is used here.
     */

    enum class Direction {
        Down,     // Added a new line
        Right,    // Removed a line
        Diagonal, // Line remained the same
    };

    // A single cell in the DP-matrix. Cell (i, j) represents the longest common
    // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
    struct Cell {
        size_t length;
        Direction direction;
    };

    auto dp_matrix = Vector<Cell>();
    dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1));

    auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
        return dp_matrix[i + width * j];
    };

    // Initialize the first row and column
    for (size_t i = 0; i <= old_lines.size(); ++i)
        dp(i, new_lines.size()) = { 0, Direction::Right };

    for (size_t j = 0; j <= new_lines.size(); ++j)
        dp(old_lines.size(), 0) = { 0, Direction::Down };

    // Fill in the rest of the DP table
    for (int i = old_lines.size() - 1; i >= 0; --i) {
        for (int j = new_lines.size() - 1; j >= 0; --j) {
            if (old_lines[i] == new_lines[j]) {
                dp(i, j) = { dp(i + 1, j + 1).length + 1, Direction::Diagonal };
            } else {
                auto down = dp(i, j + 1).length;
                auto right = dp(i + 1, j).length;
                if (down > right)
                    dp(i, j) = { down, Direction::Down };
                else
                    dp(i, j) = { right, Direction::Right };
            }
        }
    }

    Vector<Hunk> hunks;
    Hunk cur_hunk;
    bool in_hunk = false;

    auto update_hunk = [&](size_t i, size_t j, Direction direction) {
        if (!in_hunk) {
            in_hunk = true;
            cur_hunk = { i, j, {}, {} };
        }
        if (direction == Direction::Down) {
            cur_hunk.added_lines.append(new_lines[j]);
        } else if (direction == Direction::Right) {
            cur_hunk.removed_lines.append(old_lines[i]);
        }
    };

    auto flush_hunk = [&]() {
        if (in_hunk) {
            if (cur_hunk.added_lines.size() > 0)
                cur_hunk.target_start_line++;
            if (cur_hunk.removed_lines.size() > 0)
                cur_hunk.original_start_line++;
            hunks.append(cur_hunk);
            in_hunk = false;
        }
    };

    size_t i = 0;
    size_t j = 0;

    while (i < old_lines.size() && j < new_lines.size()) {
        auto& cell = dp(i, j);
        if (cell.direction == Direction::Down) {
            update_hunk(i, j, cell.direction);
            ++j;
        } else if (cell.direction == Direction::Right) {
            update_hunk(i, j, cell.direction);
            ++i;
        } else {
            ++i;
            ++j;
            flush_hunk();
        }
    }
    flush_hunk();

    return hunks;
}

}