summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMustafa Quraish <mustafaq9@gmail.com>2021-09-15 21:58:53 -0400
committerBrian Gianforcaro <b.gianfo@gmail.com>2021-09-17 16:56:59 +0000
commit5e28da1aa40ee7d1626bb1fcef12d5b0b5bc3a16 (patch)
tree803fdb7299d0b8ae9f4874257791161cc238ff73
parent27f28998b10eba3c17cbc3a0bb1ae6fc638a42b3 (diff)
downloadserenity-5e28da1aa40ee7d1626bb1fcef12d5b0b5bc3a16.zip
LibDiff: Add new API to generate hunks from two pieces of text
For now this is just a standard implementation of the longest common subsequence algorithm over the lines, except that it doesn't do any coalescing of the lines. This isn't really ideal since we get a single Hunk per changed line, and is definitely something to improve in the future.
-rw-r--r--Userland/Libraries/LibDiff/CMakeLists.txt3
-rw-r--r--Userland/Libraries/LibDiff/Generator.cpp88
-rw-r--r--Userland/Libraries/LibDiff/Generator.h15
3 files changed, 105 insertions, 1 deletions
diff --git a/Userland/Libraries/LibDiff/CMakeLists.txt b/Userland/Libraries/LibDiff/CMakeLists.txt
index fe81fedda9..1276fc59cf 100644
--- a/Userland/Libraries/LibDiff/CMakeLists.txt
+++ b/Userland/Libraries/LibDiff/CMakeLists.txt
@@ -1,7 +1,8 @@
set(SOURCES
- Hunks.cpp
Format.cpp
+ Generator.cpp
+ Hunks.cpp
)
serenity_lib(LibDiff diff)
diff --git a/Userland/Libraries/LibDiff/Generator.cpp b/Userland/Libraries/LibDiff/Generator.cpp
new file mode 100644
index 0000000000..8537e749be
--- /dev/null
+++ b/Userland/Libraries/LibDiff/Generator.cpp
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "Generator.h"
+
+namespace Diff {
+
+Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text)
+{
+ auto old_lines = old_text.lines();
+ auto new_lines = new_text.lines();
+
+ /**
+ * This is a simple implementation of the Longest Common Subsequence algorithm (over
+ * the lines of the text as opposed to the characters). A Dynamic programming approach
+ * is used here.
+ */
+
+ enum class Direction {
+ Up, // Added a new line
+ Left, // Removed a line
+ Diagonal, // Line remained the same
+ };
+
+ // A single cell in the DP-matrix. Cell (i, j) represents the longest common
+ // sub-sequence of lines between old_lines[0 : i] and new_lines[0 : j].
+ struct Cell {
+ size_t length;
+ Direction direction;
+ };
+
+ auto dp_matrix = Vector<Cell>();
+ dp_matrix.resize((old_lines.size() + 1) * (new_lines.size() + 1));
+
+ auto dp = [&dp_matrix, width = old_lines.size() + 1](size_t i, size_t j) -> Cell& {
+ return dp_matrix[i + width * j];
+ };
+
+ // Initialize the first row and column
+ for (size_t i = 0; i <= old_lines.size(); ++i)
+ dp(i, 0) = { 0, Direction::Left };
+
+ for (size_t j = 0; j <= new_lines.size(); ++j)
+ dp(0, j) = { 0, Direction::Up };
+
+ // Fill in the rest of the DP table
+ for (size_t i = 1; i <= old_lines.size(); ++i) {
+ for (size_t j = 1; j <= new_lines.size(); ++j) {
+ if (old_lines[i - 1] == new_lines[j - 1]) {
+ dp(i, j) = { dp(i - 1, j - 1).length + 1, Direction::Diagonal };
+ } else {
+ auto up = dp(i, j - 1).length;
+ auto left = dp(i - 1, j).length;
+ if (up > left)
+ dp(i, j) = { up, Direction::Up };
+ else
+ dp(i, j) = { left, Direction::Left };
+ }
+ }
+ }
+
+ Vector<Hunk> hunks;
+ size_t i = old_lines.size();
+ size_t j = new_lines.size();
+
+ // FIXME: This creates a hunk per line, very inefficient.
+ while (i > 0 && j > 0) {
+ auto& cell = dp(i, j);
+ if (cell.direction == Direction::Up) {
+ --j;
+ hunks.append({ i, j, {}, { new_lines[j] } });
+ } else if (cell.direction == Direction::Left) {
+ --i;
+ hunks.append({ i, j, { old_lines[i] }, {} });
+ } else if (cell.direction == Direction::Diagonal) {
+ --i;
+ --j;
+ }
+ }
+
+ hunks.reverse();
+ return hunks;
+}
+
+}
diff --git a/Userland/Libraries/LibDiff/Generator.h b/Userland/Libraries/LibDiff/Generator.h
new file mode 100644
index 0000000000..599cdd8461
--- /dev/null
+++ b/Userland/Libraries/LibDiff/Generator.h
@@ -0,0 +1,15 @@
+/*
+ * Copyright (c) 2021, Mustafa Quraish <mustafa@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include "Hunks.h"
+
+namespace Diff {
+
+Vector<Hunk> from_text(StringView const& old_text, StringView const& new_text);
+
+}