diff options
author | Mustafa Quraish <mustafaq9@gmail.com> | 2021-09-15 21:58:53 -0400 |
---|---|---|
committer | Brian Gianforcaro <b.gianfo@gmail.com> | 2021-09-17 16:56:59 +0000 |
commit | 5e28da1aa40ee7d1626bb1fcef12d5b0b5bc3a16 (patch) | |
tree | 803fdb7299d0b8ae9f4874257791161cc238ff73 | |
parent | 27f28998b10eba3c17cbc3a0bb1ae6fc638a42b3 (diff) | |
download | serenity-5e28da1aa40ee7d1626bb1fcef12d5b0b5bc3a16.zip |
LibDiff: Add new API to generate hunks from two pieces of text
For now this is just a standard implementation of the longest
common subsequence algorithm over the lines, except that it doesn't
do any coalescing of the lines. This isn't really ideal since
we get a single Hunk per changed line, and is definitely something
to improve in the future.
-rw-r--r-- | Userland/Libraries/LibDiff/CMakeLists.txt | 3 | ||||
-rw-r--r-- | Userland/Libraries/LibDiff/Generator.cpp | 88 | ||||
-rw-r--r-- | Userland/Libraries/LibDiff/Generator.h | 15 |
3 files changed, 105 insertions, 1 deletions
diff --git a/Userland/Libraries/LibDiff/CMakeLists.txt b/Userland/Libraries/LibDiff/CMakeLists.txt index fe81fedda9..1276fc59cf 100644 --- a/Userland/Libraries/LibDiff/CMakeLists.txt +++ b/Userland/Libraries/LibDiff/CMakeLists.txt @@ -1,7 +1,8 @@ set(SOURCES - Hunks.cpp Format.cpp + Generator.cpp + Hunks.cpp ) serenity_lib(LibDiff diff) diff --git a/Userland/Libraries/LibDiff/Generator.cpp b/Userland/Libraries/LibDiff/Generator.cpp new file mode 100644 index 0000000000..8537e749be --- /dev/null +++ b/Userland/Libraries/LibDiff/Generator.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "Generator.h" + +namespace Diff { + +Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text) +{ + auto old_lines = old_text.lines(); + auto new_lines = new_text.lines(); + + /** + * This is a simple implementation of the Longest Common Subsequence algorithm (over + * the lines of the text as opposed to the characters). A Dynamic programming approach + * is used here. + */ + + enum class Direction { + Up, // Added a new line + Left, // Removed a line + Diagonal, // Line remained the same + }; + + // A single cell in the DP-matrix. Cell (i, j) represents the longest common + // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j]. + struct Cell { + size_t length; + Direction direction; + }; + + auto dp_matrix = Vector<Cell>(); + dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1)); + + auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& { + return dp_matrix[i + width * j]; + }; + + // Initialize the first row and column + for (size_t i = 0; i <= old_lines.size(); ++i) + dp(i, 0) = { 0, Direction::Left }; + + for (size_t j = 0; j <= new_lines.size(); ++j) + dp(0, j) = { 0, Direction::Up }; + + // Fill in the rest of the DP table + for (size_t i = 1; i <= old_lines.size(); ++i) { + for (size_t j = 1; j <= new_lines.size(); ++j) { + if (old_lines[i - 1] == new_lines[j - 1]) { + dp(i, j) = { dp(i - 1, j - 1).length + 1, Direction::Diagonal }; + } else { + auto up = dp(i, j - 1).length; + auto left = dp(i - 1, j).length; + if (up > left) + dp(i, j) = { up, Direction::Up }; + else + dp(i, j) = { left, Direction::Left }; + } + } + } + + Vector<Hunk> hunks; + size_t i = old_lines.size(); + size_t j = new_lines.size(); + + // FIXME: This creates a hunk per line, very inefficient. + while (i > 0 && j > 0) { + auto& cell = dp(i, j); + if (cell.direction == Direction::Up) { + --j; + hunks.append({ i, j, {}, { new_lines[j] } }); + } else if (cell.direction == Direction::Left) { + --i; + hunks.append({ i, j, { old_lines[i] }, {} }); + } else if (cell.direction == Direction::Diagonal) { + --i; + --j; + } + } + + hunks.reverse(); + return hunks; +} + +} diff --git a/Userland/Libraries/LibDiff/Generator.h b/Userland/Libraries/LibDiff/Generator.h new file mode 100644 index 0000000000..599cdd8461 --- /dev/null +++ b/Userland/Libraries/LibDiff/Generator.h @@ -0,0 +1,15 @@ +/* + * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org> + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include "Hunks.h" + +namespace Diff { + +Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text); + +} |