diff options
author | Ali Mohammad Pur <ali.mpfard@gmail.com> | 2021-06-16 08:34:19 +0430 |
---|---|---|
committer | Ali Mohammad Pur <Ali.mpfard@gmail.com> | 2021-06-17 01:08:27 +0430 |
commit | b11b3c2f1c7d279dc8a2ff39326b25e9dc388471 (patch) | |
tree | 5ff0df85b95de7405f8575eac6a9a150419c44c1 | |
parent | 88b168ff16238507f4dded770bf7c162c75cda86 (diff) | |
download | serenity-b11b3c2f1c7d279dc8a2ff39326b25e9dc388471.zip |
Spreadsheet: Make the XSV parser start with a preview parse
Instead of parsing the whole document. That's really wasteful and
super slow.
4 files changed, 36 insertions, 7 deletions
diff --git a/Userland/Applications/Spreadsheet/ImportDialog.cpp b/Userland/Applications/Spreadsheet/ImportDialog.cpp index 293ae767cb..7e95c4ea95 100644 --- a/Userland/Applications/Spreadsheet/ImportDialog.cpp +++ b/Userland/Applications/Spreadsheet/ImportDialog.cpp @@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV> if (should_trim_trailing) behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces; - return Reader::XSV(m_csv, traits, behaviours); + return Reader::XSV(m_csv, move(traits), behaviours); }; void CSVImportDialogPage::update_preview() @@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String NonnullRefPtrVector<Sheet> sheets; if (reader.has_value()) { + reader->parse(); if (reader.value().has_error()) return String::formatted("CSV Import failed: {}", reader.value().error_string()); diff --git a/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp b/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp index 7a25cba567..0569b59dd4 100644 --- a/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp +++ b/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp @@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data) 4, 5, 6 """x", y"z, 9)~~~"; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + csv.parse(); EXPECT(!csv.has_error()); EXPECT_EQ(csv[0]["Foo"], "1"); @@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data) 4, "5 " , 6 """x", y"z, 9 )~~~"; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces }; + csv.parse(); EXPECT(!csv.has_error()); EXPECT_EQ(csv[0]["Foo"], "1"); @@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely) auto data = R"~~~(Foo, Bar, Baz x, y)~~~"; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + csv.parse(); EXPECT(csv.has_error()); EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount); } @@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely) auto data = R"~~~(Foo, Bar, Baz x, y, "z)~~~"; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + csv.parse(); EXPECT(csv.has_error()); EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure); } @@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows) 4, 5, 6 """x", y"z, 9)~~~"; auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces }; + csv.parse(); EXPECT(!csv.has_error()); bool ran = false; @@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data) auto data = file_or_error.value()->read_all(); auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders }; + csv.parse(); EXPECT(!csv.has_error()); EXPECT_EQ(csv.size(), 100000u); diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.cpp b/Userland/Applications/Spreadsheet/Readers/XSV.cpp index bdce748884..9e3b737fd7 100644 --- a/Userland/Applications/Spreadsheet/Readers/XSV.cpp +++ b/Userland/Applications/Spreadsheet/Readers/XSV.cpp @@ -11,12 +11,12 @@ namespace Reader { ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right) { - return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right)); + return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right)); } ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right) { - return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right)); + return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right)); } void XSV::set_error(ReadError error) @@ -43,8 +43,22 @@ Vector<String> XSV::headers() const return headers; } +void XSV::parse_preview() +{ + reset(); + if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None) + read_headers(); + + while (!has_error() && !m_lexer.is_eof()) { + if (m_rows.size() >= 10) + break; + m_rows.append(read_row()); + } +} + void XSV::parse() { + reset(); if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None) read_headers(); diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.h b/Userland/Applications/Spreadsheet/Readers/XSV.h index b36fb0d0ed..d5d7bcfb07 100644 --- a/Userland/Applications/Spreadsheet/Readers/XSV.h +++ b/Userland/Applications/Spreadsheet/Readers/XSV.h @@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours() class XSV { public: - XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours()) + XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours()) : m_source(source) , m_lexer(m_source) , m_traits(traits) , m_behaviours(behaviours) { - parse(); + parse_preview(); } virtual ~XSV() { } + void parse(); bool has_error() const { return m_error != ReadError::None; } ReadError error() const { return m_error; } String error_string() const @@ -180,8 +181,15 @@ private: } }; void set_error(ReadError error); - void parse(); + void parse_preview(); void read_headers(); + void reset() + { + m_lexer = GenericLexer { m_source }; + m_rows.clear(); + m_names.clear(); + m_error = ReadError::None; + } Vector<Field> read_row(bool header_row = false); Field read_one_field(); Field read_one_quoted_field(); @@ -189,7 +197,7 @@ private: StringView m_source; GenericLexer m_lexer; - const ParserTraits& m_traits; + ParserTraits m_traits; ParserBehaviour m_behaviours; Vector<Field> m_names; Vector<Vector<Field>> m_rows; |