summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAli Mohammad Pur <ali.mpfard@gmail.com>2021-06-16 08:34:19 +0430
committerAli Mohammad Pur <Ali.mpfard@gmail.com>2021-06-17 01:08:27 +0430
commitb11b3c2f1c7d279dc8a2ff39326b25e9dc388471 (patch)
tree5ff0df85b95de7405f8575eac6a9a150419c44c1
parent88b168ff16238507f4dded770bf7c162c75cda86 (diff)
downloadserenity-b11b3c2f1c7d279dc8a2ff39326b25e9dc388471.zip
Spreadsheet: Make the XSV parser start with a preview parse
Instead of parsing the whole document. That's really wasteful and super slow.
-rw-r--r--Userland/Applications/Spreadsheet/ImportDialog.cpp3
-rw-r--r--Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp6
-rw-r--r--Userland/Applications/Spreadsheet/Readers/XSV.cpp18
-rw-r--r--Userland/Applications/Spreadsheet/Readers/XSV.h16
4 files changed, 36 insertions, 7 deletions
diff --git a/Userland/Applications/Spreadsheet/ImportDialog.cpp b/Userland/Applications/Spreadsheet/ImportDialog.cpp
index 293ae767cb..7e95c4ea95 100644
--- a/Userland/Applications/Spreadsheet/ImportDialog.cpp
+++ b/Userland/Applications/Spreadsheet/ImportDialog.cpp
@@ -147,7 +147,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV>
if (should_trim_trailing)
behaviours = behaviours | Reader::ParserBehaviour::TrimTrailingFieldSpaces;
- return Reader::XSV(m_csv, traits, behaviours);
+ return Reader::XSV(m_csv, move(traits), behaviours);
};
void CSVImportDialogPage::update_preview()
@@ -195,6 +195,7 @@ Result<NonnullRefPtrVector<Sheet>, String> ImportDialog::make_and_run_for(String
NonnullRefPtrVector<Sheet> sheets;
if (reader.has_value()) {
+ reader->parse();
if (reader.value().has_error())
return String::formatted("CSV Import failed: {}", reader.value().error_string());
diff --git a/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp b/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp
index 7a25cba567..0569b59dd4 100644
--- a/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp
+++ b/Userland/Applications/Spreadsheet/Readers/Test/TestXSV.cpp
@@ -18,6 +18,7 @@ TEST_CASE(should_parse_valid_data)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
+ csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
@@ -31,6 +32,7 @@ TEST_CASE(should_parse_valid_data)
4, "5 " , 6
"""x", y"z, 9 )~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces | Reader::ParserBehaviour::TrimTrailingFieldSpaces };
+ csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv[0]["Foo"], "1");
@@ -46,6 +48,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
+ csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::NonConformingColumnCount);
}
@@ -54,6 +57,7 @@ TEST_CASE(should_fail_nicely)
auto data = R"~~~(Foo, Bar, Baz
x, y, "z)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
+ csv.parse();
EXPECT(csv.has_error());
EXPECT_EQ(csv.error(), Reader::ReadError::QuoteFailure);
}
@@ -66,6 +70,7 @@ TEST_CASE(should_iterate_rows)
4, 5, 6
"""x", y"z, 9)~~~";
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders | Reader::ParserBehaviour::TrimLeadingFieldSpaces };
+ csv.parse();
EXPECT(!csv.has_error());
bool ran = false;
@@ -82,6 +87,7 @@ BENCHMARK_CASE(fairly_big_data)
auto data = file_or_error.value()->read_all();
auto csv = Reader::CSV { data, Reader::default_behaviours() | Reader::ParserBehaviour::ReadHeaders };
+ csv.parse();
EXPECT(!csv.has_error());
EXPECT_EQ(csv.size(), 100000u);
diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.cpp b/Userland/Applications/Spreadsheet/Readers/XSV.cpp
index bdce748884..9e3b737fd7 100644
--- a/Userland/Applications/Spreadsheet/Readers/XSV.cpp
+++ b/Userland/Applications/Spreadsheet/Readers/XSV.cpp
@@ -11,12 +11,12 @@ namespace Reader {
ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right)
{
- return static_cast<ParserBehaviour>(static_cast<u32>(left) & static_cast<u32>(right));
+ return static_cast<ParserBehaviour>(to_underlying(left) & to_underlying(right));
}
ParserBehaviour operator|(ParserBehaviour left, ParserBehaviour right)
{
- return static_cast<ParserBehaviour>(static_cast<u32>(left) | static_cast<u32>(right));
+ return static_cast<ParserBehaviour>(to_underlying(left) | to_underlying(right));
}
void XSV::set_error(ReadError error)
@@ -43,8 +43,22 @@ Vector<String> XSV::headers() const
return headers;
}
+void XSV::parse_preview()
+{
+ reset();
+ if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
+ read_headers();
+
+ while (!has_error() && !m_lexer.is_eof()) {
+ if (m_rows.size() >= 10)
+ break;
+ m_rows.append(read_row());
+ }
+}
+
void XSV::parse()
{
+ reset();
if ((m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None)
read_headers();
diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.h b/Userland/Applications/Spreadsheet/Readers/XSV.h
index b36fb0d0ed..d5d7bcfb07 100644
--- a/Userland/Applications/Spreadsheet/Readers/XSV.h
+++ b/Userland/Applications/Spreadsheet/Readers/XSV.h
@@ -59,17 +59,18 @@ constexpr ParserBehaviour default_behaviours()
class XSV {
public:
- XSV(StringView source, const ParserTraits& traits, ParserBehaviour behaviours = default_behaviours())
+ XSV(StringView source, ParserTraits traits, ParserBehaviour behaviours = default_behaviours())
: m_source(source)
, m_lexer(m_source)
, m_traits(traits)
, m_behaviours(behaviours)
{
- parse();
+ parse_preview();
}
virtual ~XSV() { }
+ void parse();
bool has_error() const { return m_error != ReadError::None; }
ReadError error() const { return m_error; }
String error_string() const
@@ -180,8 +181,15 @@ private:
}
};
void set_error(ReadError error);
- void parse();
+ void parse_preview();
void read_headers();
+ void reset()
+ {
+ m_lexer = GenericLexer { m_source };
+ m_rows.clear();
+ m_names.clear();
+ m_error = ReadError::None;
+ }
Vector<Field> read_row(bool header_row = false);
Field read_one_field();
Field read_one_quoted_field();
@@ -189,7 +197,7 @@ private:
StringView m_source;
GenericLexer m_lexer;
- const ParserTraits& m_traits;
+ ParserTraits m_traits;
ParserBehaviour m_behaviours;
Vector<Field> m_names;
Vector<Vector<Field>> m_rows;