summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorItamar <itamar8910@gmail.com>2020-09-12 20:38:55 +0300
committerAndreas Kling <kling@serenityos.org>2020-09-15 21:43:29 +0200
commit11440fa78f0404a377bfbaf74d35904025ae36dc (patch)
treea09628c25bc328bcec9037b01a89575c8e41f095
parent7c70183f3f7fb8e3e154e150dadf817bea1d7da9 (diff)
downloadserenity-11440fa78f0404a377bfbaf74d35904025ae36dc.zip
LibDiff: Add library for working with diffs
LibDiff currently contains functionality for parsing diffs in the "unified format" and for a generating simple diff that contains only additions.
-rw-r--r--Libraries/CMakeLists.txt1
-rw-r--r--Libraries/LibDiff/CMakeLists.txt8
-rw-r--r--Libraries/LibDiff/Format.cpp43
-rw-r--r--Libraries/LibDiff/Format.h33
-rw-r--r--Libraries/LibDiff/Hunks.cpp151
-rw-r--r--Libraries/LibDiff/Hunks.h57
6 files changed, 293 insertions, 0 deletions
diff --git a/Libraries/CMakeLists.txt b/Libraries/CMakeLists.txt
index 23aa1451c8..b254c8046b 100644
--- a/Libraries/CMakeLists.txt
+++ b/Libraries/CMakeLists.txt
@@ -27,3 +27,4 @@ add_subdirectory(LibTLS)
add_subdirectory(LibVT)
add_subdirectory(LibWeb)
add_subdirectory(LibX86)
+add_subdirectory(LibDiff)
diff --git a/Libraries/LibDiff/CMakeLists.txt b/Libraries/LibDiff/CMakeLists.txt
new file mode 100644
index 0000000000..fe81fedda9
--- /dev/null
+++ b/Libraries/LibDiff/CMakeLists.txt
@@ -0,0 +1,8 @@
+
+set(SOURCES
+ Hunks.cpp
+ Format.cpp
+)
+
+serenity_lib(LibDiff diff)
+target_link_libraries(LibDiff LibC)
diff --git a/Libraries/LibDiff/Format.cpp b/Libraries/LibDiff/Format.cpp
new file mode 100644
index 0000000000..d1a5711247
--- /dev/null
+++ b/Libraries/LibDiff/Format.cpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Format.h"
+#include <AK/String.h>
+#include <AK/StringBuilder.h>
+#include <AK/Vector.h>
+
+namespace Diff {
+String generate_only_additions(const String& text)
+{
+ auto lines = text.split('\n', true); // Keep empty
+ StringBuilder builder;
+ builder.appendf("@@ -1,%u +1,%u @@\n", lines.size());
+ for (const auto& line : lines) {
+ builder.appendf("+%s\n", line.characters());
+ }
+ return builder.to_string();
+}
+};
diff --git a/Libraries/LibDiff/Format.h b/Libraries/LibDiff/Format.h
new file mode 100644
index 0000000000..73cbbcf407
--- /dev/null
+++ b/Libraries/LibDiff/Format.h
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/String.h>
+
+namespace Diff {
+String generate_only_additions(const String&);
+};
diff --git a/Libraries/LibDiff/Hunks.cpp b/Libraries/LibDiff/Hunks.cpp
new file mode 100644
index 0000000000..c7828c2f64
--- /dev/null
+++ b/Libraries/LibDiff/Hunks.cpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "Hunks.h"
+
+// #define DEBUG_HUNKS
+
+namespace Diff {
+Vector<Hunk> parse_hunks(const String& diff)
+{
+ Vector<String> diff_lines = diff.split('\n');
+ if (diff_lines.is_empty())
+ return {};
+
+ Vector<Hunk> hunks;
+
+ size_t line_index = 0;
+ HunkLocation current_location {};
+
+ // Skip to first hunk
+ while (diff_lines[line_index][0] != '@') {
+ ++line_index;
+ }
+
+ while (line_index < diff_lines.size()) {
+ if (diff_lines[line_index][0] == '@') {
+ current_location = parse_hunk_location(diff_lines[line_index]);
+ ++line_index;
+ continue;
+ }
+ if (diff_lines[line_index][0] == ' ') {
+ current_location.apply_offset(1, HunkLocation::LocationType::Both);
+ ++line_index;
+ continue;
+ }
+ Hunk hunk {};
+ hunk.original_start_line = current_location.original_start_line;
+ hunk.target_start_line = current_location.target_start_line;
+
+ while (line_index < diff_lines.size() && diff_lines[line_index][0] == '-') {
+ hunk.removed_lines.append(diff_lines[line_index].substring(1, diff_lines[line_index].length() - 1));
+ current_location.apply_offset(1, HunkLocation::LocationType::Original);
+ ++line_index;
+ }
+ while (line_index < diff_lines.size() && diff_lines[line_index][0] == '+') {
+ hunk.added_lines.append(diff_lines[line_index].substring(1, diff_lines[line_index].length() - 1));
+ current_location.apply_offset(1, HunkLocation::LocationType::Target);
+ ++line_index;
+ }
+
+ while (line_index < diff_lines.size() && diff_lines[line_index][0] == ' ') {
+ current_location.apply_offset(1, HunkLocation::LocationType::Both);
+ ++line_index;
+ }
+ hunks.append(hunk);
+ }
+
+#ifdef DEBUG_HUNKS
+ for (const auto& hunk : hunks) {
+ dbg() << "Hunk location:";
+ dbg() << "orig: " << hunk.original_start_line;
+ dbg() << "target: " << hunk.target_start_line;
+ dbg() << "removed:";
+ for (const auto& line : hunk.removed_lines) {
+ dbg() << "- " << line;
+ }
+ dbg() << "added:";
+ for (const auto& line : hunk.added_lines) {
+ dbg() << "+ " << line;
+ }
+ }
+#endif
+
+ return hunks;
+}
+
+HunkLocation parse_hunk_location(const String& location_line)
+{
+ size_t char_index = 0;
+ struct StartAndLength {
+ size_t start { 0 };
+ size_t length { 0 };
+ };
+ auto parse_start_and_length_pair = [](const String& raw) {
+ auto index_of_separator = raw.index_of(",").value();
+ auto start = raw.substring(0, index_of_separator);
+ auto length = raw.substring(index_of_separator + 1, raw.length() - index_of_separator - 1);
+ auto res = StartAndLength { start.to_uint().value() - 1, length.to_uint().value() - 1 };
+ return res;
+ };
+ while (char_index < location_line.length() && location_line[char_index++] != '-') {
+ }
+ ASSERT(char_index < location_line.length());
+
+ size_t original_location_start_index = char_index;
+
+ while (char_index < location_line.length() && location_line[char_index++] != ' ') {
+ }
+ ASSERT(char_index < location_line.length() && location_line[char_index] == '+');
+ size_t original_location_end_index = char_index - 2;
+
+ size_t target_location_start_index = char_index + 1;
+
+ char_index += 1;
+ while (char_index < location_line.length() && location_line[char_index++] != ' ') {
+ }
+ ASSERT(char_index < location_line.length());
+
+ size_t target_location_end_index = char_index - 2;
+
+ auto original_pair = parse_start_and_length_pair(location_line.substring(original_location_start_index, original_location_end_index - original_location_start_index + 1));
+ auto target_pair = parse_start_and_length_pair(location_line.substring(target_location_start_index, target_location_end_index - target_location_start_index + 1));
+ return { original_pair.start, original_pair.length, target_pair.start, target_pair.length };
+}
+
+void HunkLocation::apply_offset(size_t offset, HunkLocation::LocationType type)
+{
+ if (type == LocationType::Original || type == LocationType::Both) {
+ original_start_line += offset;
+ original_length -= offset;
+ }
+ if (type == LocationType::Target || type == LocationType::Both) {
+ target_start_line += offset;
+ target_length -= offset;
+ }
+}
+
+};
diff --git a/Libraries/LibDiff/Hunks.h b/Libraries/LibDiff/Hunks.h
new file mode 100644
index 0000000000..a24dd2e72c
--- /dev/null
+++ b/Libraries/LibDiff/Hunks.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/String.h>
+#include <AK/Vector.h>
+
+namespace Diff {
+
+struct HunkLocation {
+ size_t original_start_line { 0 };
+ size_t original_length { 0 };
+ size_t target_start_line { 0 };
+ size_t target_length { 0 };
+
+ enum class LocationType {
+ Original,
+ Target,
+ Both
+ };
+ void apply_offset(size_t offset, LocationType);
+};
+
+struct Hunk {
+ size_t original_start_line { 0 };
+ size_t target_start_line { 0 };
+ Vector<String> removed_lines;
+ Vector<String> added_lines;
+};
+
+Vector<Hunk> parse_hunks(const String& diff);
+HunkLocation parse_hunk_location(const String& location_line);
+};