diff options
author | AnotherTest <ali.mpfard@gmail.com> | 2021-03-27 16:59:25 +0430 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-03-28 11:46:47 +0200 |
commit | 894bfa30a21f2cfa5f046144f1e524a7ae2c7807 (patch) | |
tree | 8fe0a283873c40701e464cc22c65b6a5f95eb5d2 /Userland/Applications | |
parent | 102065a8a9e0aa2a11821931f7f497c4afae9ec1 (diff) | |
download | serenity-894bfa30a21f2cfa5f046144f1e524a7ae2c7807.zip |
Spreadsheet: Make the CSV reader more lenient
This adds an option "Lenient" that makes the reader conform to what
appears to be the norm in spreadsheet-land:
- Treat missing values as empty ones
- Update previously read rows if another row with more columns are seen
afterwards
Diffstat (limited to 'Userland/Applications')
-rw-r--r-- | Userland/Applications/Spreadsheet/ImportDialog.cpp | 2 | ||||
-rw-r--r-- | Userland/Applications/Spreadsheet/Readers/XSV.cpp | 27 | ||||
-rw-r--r-- | Userland/Applications/Spreadsheet/Readers/XSV.h | 4 |
3 files changed, 28 insertions, 5 deletions
diff --git a/Userland/Applications/Spreadsheet/ImportDialog.cpp b/Userland/Applications/Spreadsheet/ImportDialog.cpp index d607a1e4d8..3d920c3357 100644 --- a/Userland/Applications/Spreadsheet/ImportDialog.cpp +++ b/Userland/Applications/Spreadsheet/ImportDialog.cpp @@ -158,7 +158,7 @@ auto CSVImportDialogPage::make_reader() -> Optional<Reader::XSV> quote_escape, }; - auto behaviours = Reader::default_behaviours(); + auto behaviours = Reader::default_behaviours() | Reader::ParserBehaviour::Lenient; if (should_read_headers) behaviours = behaviours | Reader::ParserBehaviour::ReadHeaders; diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.cpp b/Userland/Applications/Spreadsheet/Readers/XSV.cpp index 8ed6ce9dac..ca9c3eb4a1 100644 --- a/Userland/Applications/Spreadsheet/Readers/XSV.cpp +++ b/Userland/Applications/Spreadsheet/Readers/XSV.cpp @@ -103,10 +103,29 @@ Vector<XSV::Field> XSV::read_row(bool header_row) } } - if (!header_row && (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None && row.size() != m_names.size()) - set_error(ReadError::NonConformingColumnCount); - else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size()) - set_error(ReadError::NonConformingColumnCount); + auto is_lenient = (m_behaviours & ParserBehaviour::Lenient) != ParserBehaviour::None; + if (is_lenient) { + if (m_rows.is_empty()) + return row; + + auto& last_row = m_rows.last(); + if (row.size() < last_row.size()) { + if (!m_names.is_empty()) + row.resize(m_names.size()); + else + row.resize(last_row.size()); + } else if (row.size() > last_row.size()) { + auto new_size = row.size(); + for (auto& row : m_rows) + row.resize(new_size); + } + } else { + auto should_read_headers = (m_behaviours & ParserBehaviour::ReadHeaders) != ParserBehaviour::None; + if (!header_row && should_read_headers && row.size() != m_names.size()) + set_error(ReadError::NonConformingColumnCount); + else if (!header_row && !has_explicit_headers() && !m_rows.is_empty() && m_rows.first().size() != row.size()) + set_error(ReadError::NonConformingColumnCount); + } return row; } diff --git a/Userland/Applications/Spreadsheet/Readers/XSV.h b/Userland/Applications/Spreadsheet/Readers/XSV.h index 309e0ec384..444c81992d 100644 --- a/Userland/Applications/Spreadsheet/Readers/XSV.h +++ b/Userland/Applications/Spreadsheet/Readers/XSV.h @@ -41,6 +41,10 @@ enum class ParserBehaviour : u32 { TrimLeadingFieldSpaces = ReadHeaders << 2, TrimTrailingFieldSpaces = ReadHeaders << 3, QuoteOnlyInFieldStart = ReadHeaders << 4, + Lenient = ReadHeaders << 5, // This is the typical "spreadsheet import" behavior + // Currently, it: + // - fills in missing fields with empty values + // - updates previous rows with extra columns }; ParserBehaviour operator&(ParserBehaviour left, ParserBehaviour right); |