diff options
author | AnotherTest <ali.mpfard@gmail.com> | 2020-11-23 11:52:45 +0330 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2020-11-24 21:38:13 +0100 |
commit | 31523f6c644f193dea221b82f37ef91c1077b723 (patch) | |
tree | c1313e036724fa58328efa0f717e1e31eee78fa6 | |
parent | 438829a1d5c0244aaa1b432d553885ae2542e668 (diff) | |
download | serenity-31523f6c644f193dea221b82f37ef91c1077b723.zip |
Spreadsheet: Add a CSV reader and writer
This is not utilised yet.
-rw-r--r-- | Applications/Spreadsheet/CMakeLists.txt | 1 | ||||
-rw-r--r-- | Applications/Spreadsheet/Readers/CSV.h | 43 | ||||
-rw-r--r-- | Applications/Spreadsheet/Readers/Test/TestXSV.cpp | 110 | ||||
-rw-r--r-- | Applications/Spreadsheet/Readers/XSV.cpp | 272 | ||||
-rw-r--r-- | Applications/Spreadsheet/Readers/XSV.h | 208 | ||||
-rw-r--r-- | Applications/Spreadsheet/Writers/CSV.h | 44 | ||||
-rw-r--r-- | Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp | 96 | ||||
-rw-r--r-- | Applications/Spreadsheet/Writers/XSV.h | 215 |
8 files changed, 989 insertions, 0 deletions
diff --git a/Applications/Spreadsheet/CMakeLists.txt b/Applications/Spreadsheet/CMakeLists.txt index a80b4f07b7..9ba555450d 100644 --- a/Applications/Spreadsheet/CMakeLists.txt +++ b/Applications/Spreadsheet/CMakeLists.txt @@ -15,6 +15,7 @@ set(SOURCES CondFormattingViewUI.h HelpWindow.cpp JSIntegration.cpp + Readers/XSV.cpp Spreadsheet.cpp SpreadsheetModel.cpp SpreadsheetView.cpp diff --git a/Applications/Spreadsheet/Readers/CSV.h b/Applications/Spreadsheet/Readers/CSV.h new file mode 100644 index 0000000000..866ae67141 --- /dev/null +++ b/Applications/Spreadsheet/Readers/CSV.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "XSV.h" +#include <AK/Forward.h> +#include <AK/StringView.h> + +namespace Reader { + +class CSV : public XSV { +public: + CSV(StringView source, ParserBehaviour behaviours = default_behaviours()) + : XSV(source, { ",", "\"", ParserTraits::Repeat }, behaviours) + { + } +}; + +} diff --git a/Applications/Spreadsheet/Readers/Test/TestXSV.cpp b/Applications/Spreadsheet/Readers/Test/TestXSV.cpp new file mode 100644 index 0000000000..b80093d556 --- /dev/null +++ b/Applications/Spreadsheet/Readers/Test/TestXSV.cpp @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <AK/TestSuite.h> + +#include "../CSV.h" +#include "../XSV.h" +#include <LibCore/File.h> + +TEST_CASE(should_parse_valid_data) +{ + { + auto data = R"~~~(Foo, Bar, Baz + 1, 2, 3 + 4, 5, 6 + """x", y"z, 9)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(!csv.has_error()); + + EXPECT_EQ(csv[0]["Foo"], "1"); + EXPECT_EQ(csv[2]["Foo"], "\"x"); + EXPECT_EQ(csv[2]["Bar"], "y\"z"); + } + + { + auto data = R"~~~(Foo, Bar, Baz + 1 , 2, 3 + 4, "5 " , 6 + """x", y"z, 9 )~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces }; + EXPECT(!csv.has_error()); + + EXPECT_EQ(csv[0]["Foo"], "1"); + EXPECT_EQ(csv[1]["Bar"], "5 "); + EXPECT_EQ(csv[2]["Foo"], "\"x"); + EXPECT_EQ(csv[2]["Baz"], "9"); + } +} + +TEST_CASE(should_fail_nicely) +{ + { + auto data = R"~~~(Foo, Bar, Baz + x, y)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(csv.has_error()); + EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount); + } + + { + auto data = R"~~~(Foo, Bar, Baz + x, y, "z)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(csv.has_error()); + EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure); + } +} + +TEST_CASE(should_iterate_rows) +{ + auto data = R"~~~(Foo, Bar, Baz + 1, 2, 3 + 4, 5, 6 + """x", y"z, 9)~~~"; + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + EXPECT(!csv.has_error()); + + bool ran = false; + for (auto row : csv) + ran = !row[0].is_empty(); + + EXPECT(ran); +} + +BENCHMARK_CASE(fairly_big_data) +{ + auto file_or_error = Core::File::open(__FILE__ ".data", Core::IODevice::OpenMode::ReadOnly); + EXPECT_EQ_FORCE(file_or_error.is_error(), false); + + auto data = file_or_error.value()->read_all(); + auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders }; + + EXPECT(!csv.has_error()); + EXPECT_EQ(csv.size(), 100000u); +} + +TEST_MAIN(XSV) diff --git a/Applications/Spreadsheet/Readers/XSV.cpp b/Applications/Spreadsheet/Readers/XSV.cpp new file mode 100644 index 0000000000..d87c86748e --- /dev/null +++ b/Applications/Spreadsheet/Readers/XSV.cpp @@ -0,0 +1,272 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "XSV.h" +#include <AK/StringBuilder.h> + +namespace Reader { + +ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right) +{ + return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right)); +} + +ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right) +{ + return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right)); +} + +void XSV::set_error(ReadError error) +{ + if (m_error == ReadError::None) + m_error = error; +} + +Vector<String> XSV::headers() const +{ + Vector<String> headers; + for (auto& field : m_names) + headers.append(field.is_string_view ? field.as_string_view : field.as_string.view()); + + return headers; +} + +void XSV::parse() +{ + if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None) + read_headers(); + + while (!has_error() && !m_lexer.is_eof()) + m_rows.append(read_row()); + + if (!m_lexer.is_eof()) + set_error(ReadError::DataPastLogicalEnd); +} + +void XSV::read_headers() +{ + if (!m_names.is_empty()) { + set_error(ReadError::InternalError); + m_names.clear(); + } + + m_names = read_row(true); +} + +Vector<XSV::Field> XSV::read_row(bool header_row) +{ + Vector<Field> row; + bool first = true; + while (!(m_lexer.is_eof() || m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) && (first || m_lexer.consume_specific(m_traits.separator))) { + first = false; + row.append(read_one_field()); + } + + if (!m_lexer.is_eof()) { + auto crlf_ok = m_lexer.consume_specific("\r\n"); + if (!crlf_ok) { + auto lf_ok = m_lexer.consume_specific('\n'); + if (!lf_ok) + set_error(ReadError::DataPastLogicalEnd); + } + } + + if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size()) + set_error(ReadError::NonConformingColumnCount); + + return row; +} + +XSV::Field XSV::read_one_field() +{ + if ((m_behaviours & ParserBehaviour::TrimLeadingFieldSpaces) != ParserBehaviour::None) + m_lexer.consume_while(is_any_of(" \t\v")); + + bool is_quoted = false; + Field field; + if (m_lexer.next_is(m_traits.quote.view())) { + is_quoted = true; + field = read_one_quoted_field(); + } else { + field = read_one_unquoted_field(); + } + + if ((m_behaviours & ParserBehaviour::TrimTrailingFieldSpaces) != ParserBehaviour::None) { + m_lexer.consume_while(is_any_of(" \t\v")); + + if (!is_quoted) { + // Also have to trim trailing spaces from unquoted fields. + StringView view; + if (field.is_string_view) + view = field.as_string_view; + else + view = field.as_string; + + if (!view.is_empty()) { + ssize_t i = view.length() - 1; + for (; i >= 0; --i) { + if (!view.substring_view(i, 1).is_one_of(" ", "\t", "\v")) + break; + } + view = view.substring_view(0, i + 1); + } + + if (field.is_string_view) + field.as_string_view = view; + else + field.as_string = field.as_string.substring(0, view.length()); + } + } + + return field; +} + +XSV::Field XSV::read_one_quoted_field() +{ + if (!m_lexer.consume_specific(m_traits.quote)) + set_error(ReadError::InternalError); + + size_t start = m_lexer.tell(), end = start; + bool is_copy = false; + StringBuilder builder; + auto allow_newlines = (m_behaviours & ParserBehaviour::AllowNewlinesInFields) != ParserBehaviour::None; + + for (; !m_lexer.is_eof();) { + char ch; + switch (m_traits.quote_escape) { + case ParserTraits::Backslash: + if (m_lexer.consume_specific('\\') && m_lexer.consume_specific(m_traits.quote)) { + // If there is an escaped quote, we have no choice but to make a copy. + if (!is_copy) { + is_copy = true; + builder.append(m_source.substring_view(start, end - start)); + } + builder.append(m_traits.quote); + end = m_lexer.tell(); + continue; + } + break; + case ParserTraits::Repeat: + if (m_lexer.consume_specific(m_traits.quote)) { + if (m_lexer.consume_specific(m_traits.quote)) { + // If there is an escaped quote, we have no choice but to make a copy. + if (!is_copy) { + is_copy = true; + builder.append(m_source.substring_view(start, end - start)); + } + builder.append(m_traits.quote); + end = m_lexer.tell(); + continue; + } + for (size_t i = 0; i < m_traits.quote.length(); ++i) + m_lexer.retreat(); + goto end; + } + break; + } + + if (m_lexer.next_is(m_traits.quote.view())) + goto end; + + if (!allow_newlines) { + if (m_lexer.next_is('\n') || m_lexer.next_is("\r\n")) + goto end; + } + + ch = m_lexer.consume(); + if (is_copy) + builder.append(ch); + end = m_lexer.tell(); + continue; + + end: + break; + } + + if (!m_lexer.consume_specific(m_traits.quote)) + set_error(ReadError::QuoteFailure); + + if (is_copy) + return { {}, builder.to_string(), false }; + + return { m_source.substring_view(start, end - start), {}, true }; +} + +XSV::Field XSV::read_one_unquoted_field() +{ + size_t start = m_lexer.tell(), end = start; + bool allow_quote_in_field = (m_behaviours & ParserBehaviour::QuoteOnlyInFieldStart) != ParserBehaviour::None; + + for (; !m_lexer.is_eof();) { + if (m_lexer.next_is(m_traits.separator.view())) + break; + + if (m_lexer.next_is("\r\n") || m_lexer.next_is("\n")) + break; + + if (m_lexer.consume_specific(m_traits.quote)) { + if (!allow_quote_in_field) + set_error(ReadError::QuoteFailure); + end = m_lexer.tell(); + continue; + } + + m_lexer.consume(); + end = m_lexer.tell(); + } + + return { m_source.substring_view(start, end - start), {}, true }; +} + +StringView XSV::Row::operator[](StringView name) const +{ + ASSERT(!m_xsv.m_names.is_empty()); + auto it = m_xsv.m_names.find([&](auto&& entry) { return name == entry; }); + ASSERT(!it.is_end()); + + return (*this)[it.index()]; +} + +StringView XSV::Row::operator[](size_t column) const +{ + auto& field = m_xsv.m_rows[m_index][column]; + if (field.is_string_view) + return field.as_string_view; + return field.as_string; +} + +const XSV::Row XSV::operator[](size_t index) const +{ + return const_cast<XSV&>(*this)[index]; +} + +XSV::Row XSV::operator[](size_t index) +{ + ASSERT(m_rows.size() > index); + return Row { *this, index }; +} + +} diff --git a/Applications/Spreadsheet/Readers/XSV.h b/Applications/Spreadsheet/Readers/XSV.h new file mode 100644 index 0000000000..0b32ca767d --- /dev/null +++ b/Applications/Spreadsheet/Readers/XSV.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/GenericLexer.h> +#include <AK/String.h> +#include <AK/StringView.h> +#include <AK/Types.h> +#include <AK/Vector.h> + +namespace Reader { + +enum class ParserBehaviour : u32 { + None = 0, + ReadHeaders = 1, + AllowNewlinesInFields = ReadHeaders << 1, + TrimLeadingFieldSpaces = ReadHeaders << 2, + TrimTrailingFieldSpaces = ReadHeaders << 3, + QuoteOnlyInFieldStart = ReadHeaders << 4, +}; + +ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right); +ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right); + +struct ParserTraits { + String separator; + String quote { "\"" }; + enum { + Repeat, + Backslash, + } quote_escape { Repeat }; +}; + +#define ENUMERATE_READ_ERRORS() \ + E(None, "No errors") \ + E(NonConformingColumnCount, "Header count does not match given column count") \ + E(QuoteFailure, "Quoting failure") \ + E(InternalError, "Internal error") \ + E(DataPastLogicalEnd, "Exrta data past the logical end of the rows") + +enum class ReadError { +#define E(name, _) name, + ENUMERATE_READ_ERRORS() +#undef E +}; + +inline constexpr ParserBehaviour default_behaviours() +{ + return ParserBehaviour::QuoteOnlyInFieldStart; +} + +class XSV { +public: + XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours()) + : m_source(source) + , m_lexer(m_source) + , m_traits(traits) + , m_behaviours(behaviours) + { + parse(); + } + + virtual ~XSV() { } + + bool has_error() const { return m_error != ReadError::None; } + ReadError error() const { return m_error; } + String error_string() const + { + switch (m_error) { +#define E(x, y) \ + case ReadError::x: \ + return y; + + ENUMERATE_READ_ERRORS(); +#undef E + } + ASSERT_NOT_REACHED(); + } + + size_t size() const { return m_rows.size(); } + Vector<String> headers() const; + + class Row { + public: + explicit Row(XSV& xsv, size_t index) + : m_xsv(xsv) + , m_index(index) + { + } + + StringView operator[](StringView name) const; + StringView operator[](size_t column) const; + + size_t index() const { return m_index; } + + // FIXME: Implement begin() and end(), keeping `Field' out of the API. + + private: + XSV& m_xsv; + size_t m_index { 0 }; + }; + + template<bool const_> + class RowIterator { + public: + explicit RowIterator(const XSV& xsv, size_t init_index = 0) requires(const_) + : m_xsv(const_cast<XSV&>(xsv)) + , m_index(init_index) + { + } + + explicit RowIterator(XSV& xsv, size_t init_index = 0) requires(!const_) + : m_xsv(xsv) + , m_index(init_index) + { + } + + Row operator*() const { return Row { m_xsv, m_index }; } + Row operator*() requires(!const_) { return Row { m_xsv, m_index }; } + + RowIterator& operator++() + { + ++m_index; + return *this; + } + + bool is_end() const { return m_index == m_xsv.m_rows.size(); } + bool operator==(const RowIterator& other) const + { + return m_index == other.m_index && &m_xsv == &other.m_xsv; + } + bool operator==(const RowIterator<!const_>& other) const + { + return m_index == other.m_index && &m_xsv == &other.m_xsv; + } + + private: + XSV& m_xsv; + size_t m_index { 0 }; + }; + + const Row operator[](size_t index) const; + Row operator[](size_t index); + + auto begin() { return RowIterator<false>(*this); } + auto end() { return RowIterator<false>(*this, m_rows.size()); } + + auto begin() const { return RowIterator<true>(*this); } + auto end() const { return RowIterator<true>(*this, m_rows.size()); } + + using ConstIterator = RowIterator<true>; + using Iterator = RowIterator<false>; + +private: + struct Field { + StringView as_string_view; + String as_string; // This member only used if the parser couldn't use the original source verbatim. + bool is_string_view { true }; + + bool operator==(StringView other) const + { + if (is_string_view) + return other == as_string_view; + return as_string == other; + } + }; + void set_error(ReadError error); + void parse(); + void read_headers(); + Vector<Field> read_row(bool header_row = false); + Field read_one_field(); + Field read_one_quoted_field(); + Field read_one_unquoted_field(); + + StringView m_source; + GenericLexer m_lexer; + const ParserTraits& m_traits; + ParserBehaviour m_behaviours; + Vector<Field> m_names; + Vector<Vector<Field>> m_rows; + ReadError m_error { ReadError::None }; +}; + +} diff --git a/Applications/Spreadsheet/Writers/CSV.h b/Applications/Spreadsheet/Writers/CSV.h new file mode 100644 index 0000000000..49940fbdf1 --- /dev/null +++ b/Applications/Spreadsheet/Writers/CSV.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include "XSV.h" +#include <AK/Forward.h> +#include <AK/StringView.h> + +namespace Writer { + +template<typename ContainerType> +class CSV : public XSV<ContainerType> { +public: + CSV(OutputStream& output, const ContainerType& data, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours()) + : XSV<ContainerType>(output, data, { ",", "\"", WriterTraits::Repeat }, headers, behaviours) + { + } +}; + +} diff --git a/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp b/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp new file mode 100644 index 0000000000..4971658431 --- /dev/null +++ b/Applications/Spreadsheet/Writers/Test/TestXSVWriter.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <AK/TestSuite.h> + +#include "../CSV.h" +#include "../XSV.h" +#include <AK/MemoryStream.h> + +TEST_CASE(can_write) +{ + Vector<Vector<int>> data = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data); + + auto expected_output = R"~(1,2,3 +4,5,6 +7,8,9 +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_CASE(can_write_with_header) +{ + Vector<Vector<int>> data = { + { 1, 2, 3 }, + { 4, 5, 6 }, + { 7, 8, 9 }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data, { "A", "B\"", "C" }); + + auto expected_output = R"~(A,"B""",C +1,2,3 +4,5,6 +7,8,9 +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_CASE(can_write_with_different_behaviours) +{ + Vector<Vector<String>> data = { + { "Well", "Hello\"", "Friends" }, + { "We\"ll", "Hello,", " Friends" }, + }; + + auto buffer = ByteBuffer::create_uninitialized(1024); + OutputMemoryStream stream { buffer }; + + Writer::CSV csv(stream, data, { "A", "B\"", "C" }, Writer::WriterBehaviour::QuoteOnlyInFieldStart | Writer::WriterBehaviour::WriteHeaders); + + auto expected_output = R"~(A,B",C +Well,Hello",Friends +We"ll,"Hello,", Friends +)~"; + + EXPECT_EQ(StringView { stream.bytes() }, expected_output); +} + +TEST_MAIN(XSV) diff --git a/Applications/Spreadsheet/Writers/XSV.h b/Applications/Spreadsheet/Writers/XSV.h new file mode 100644 index 0000000000..7a065f87d1 --- /dev/null +++ b/Applications/Spreadsheet/Writers/XSV.h @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2020, the SerenityOS developers. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include <AK/GenericLexer.h> +#include <AK/OwnPtr.h> +#include <AK/Stream.h> +#include <AK/String.h> +#include <AK/StringView.h> +#include <AK/Types.h> +#include <AK/Vector.h> + +namespace Writer { + +enum class WriterBehaviour : u32 { + None = 0, + WriteHeaders = 1, + AllowNewlinesInFields = WriteHeaders << 1, + QuoteOnlyInFieldStart = WriteHeaders << 2, + QuoteAll = WriteHeaders << 3, +}; + +inline WriterBehaviour operator&(WriterBehaviour left, WriterBehaviour right) +{ + return static_cast<WriterBehaviour>(static_cast<u32>(left) & static_cast<u32>(right)); +} + +inline WriterBehaviour operator|(WriterBehaviour left, WriterBehaviour right) +{ + return static_cast<WriterBehaviour>(static_cast<u32>(left) | static_cast<u32>(right)); +} + +struct WriterTraits { + String separator; + String quote { "\"" }; + enum { + Repeat, + Backslash, + } quote_escape { Repeat }; +}; + +#define ENUMERATE_WRITE_ERRORS() \ + E(None, "No errors") \ + E(NonConformingColumnCount, "Header count does not match given column count") \ + E(InternalError, "Internal error") + +enum class WriteError { +#define E(name, _) name, + ENUMERATE_WRITE_ERRORS() +#undef E +}; + +inline constexpr WriterBehaviour default_behaviours() +{ + return WriterBehaviour::None; +} + +template<typename ContainerType> +class XSV { +public: + XSV(OutputStream& output, const ContainerType& data, const WriterTraits& traits, const Vector<StringView>& headers = {}, WriterBehaviour behaviours = default_behaviours()) + : m_data(data) + , m_traits(traits) + , m_behaviours(behaviours) + , m_names(headers) + , m_output(output) + { + if (!headers.is_empty()) + m_behaviours = m_behaviours | WriterBehaviour::WriteHeaders; + + generate(); + } + + virtual ~XSV() { } + + bool has_error() const { return m_error != WriteError::None; } + WriteError error() const { return m_error; } + String error_string() const + { + switch (m_error) { +#define E(x, y) \ + case WriteError::x: \ + return y; + + ENUMERATE_WRITE_ERRORS(); +#undef E + } + ASSERT_NOT_REACHED(); + } + +private: + void set_error(WriteError error) + { + if (m_error == WriteError::None) + m_error = error; + } + + void generate() + { + auto with_headers = (m_behaviours & WriterBehaviour::WriteHeaders) != WriterBehaviour::None; + if (with_headers) { + write_row(m_names); + if (m_output.write({ "\n", 1 }) != 1) + set_error(WriteError::InternalError); + } + + for (auto&& row : m_data) { + if (with_headers) { + if (row.size() != m_names.size()) + set_error(WriteError::NonConformingColumnCount); + } + + write_row(row); + if (m_output.write({ "\n", 1 }) != 1) + set_error(WriteError::InternalError); + } + } + + template<typename T> + void write_row(T&& row) + { + bool first = true; + for (auto&& entry : row) { + if (!first) { + if (m_output.write(m_traits.separator.bytes()) != m_traits.separator.length()) + set_error(WriteError::InternalError); + } + first = false; + write_entry(entry); + } + } + + template<typename T> + void write_entry(T&& entry) + { + auto string = String::formatted("{}", FormatIfSupported(entry)); + + auto safe_to_write_normally = !string.contains("\n") && !string.contains(m_traits.separator); + if (safe_to_write_normally) { + if ((m_behaviours & WriterBehaviour::QuoteOnlyInFieldStart) == WriterBehaviour::None) + safe_to_write_normally = !string.contains(m_traits.quote); + else + safe_to_write_normally = !string.starts_with(m_traits.quote); + } + if (safe_to_write_normally) { + if (m_output.write(string.bytes()) != string.length()) + set_error(WriteError::InternalError); + return; + } + + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + + GenericLexer lexer(string); + while (!lexer.is_eof()) { + if (lexer.consume_specific(m_traits.quote)) { + switch (m_traits.quote_escape) { + case WriterTraits::Repeat: + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + break; + case WriterTraits::Backslash: + if (m_output.write({ "\\", 1 }) != 1) + set_error(WriteError::InternalError); + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + break; + } + continue; + } + + auto ch = lexer.consume(); + if (m_output.write({ &ch, 1 }) != 1) + set_error(WriteError::InternalError); + } + + if (m_output.write(m_traits.quote.bytes()) != m_traits.quote.length()) + set_error(WriteError::InternalError); + } + + const ContainerType& m_data; + const WriterTraits& m_traits; + WriterBehaviour m_behaviours; + const Vector<StringView>& m_names; + WriteError m_error { WriteError::None }; + OutputStream& m_output; +}; + +} |