diff options
author | Andreas Kling <kling@serenityos.org> | 2022-11-19 20:23:18 +0100 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2022-12-08 09:54:20 +0100 |
commit | d6a3be1615ea5dbe2219f8da10b3233164210dcc (patch) | |
tree | def9fc78de44ffc021fdce5c209224d0ebe495b5 /Userland/Libraries | |
parent | f4f5b045caea7f735987687cf4d1b9d630c4676a (diff) | |
download | serenity-d6a3be1615ea5dbe2219f8da10b3233164210dcc.zip |
LibPDF: Add missing character quirk for WinAnsiEncoding fonts
Fonts with the encoding name "WinAnsiEncoding" should render missing
characters above character code 040 (octal) as a "bullet" character.
This patch adds Encoding::should_map_to_bullet(char_code) which is then
called by char_code_to_code_point() to check if the given char code
should be displayed as a bullet instead.
I didn't have a good way to test this, so I've only verified that it
works by manually overriding inputs to the function during the rendering
stage.
This takes care of a FIXME in the Annex D part of the PDF specification.
Diffstat (limited to 'Userland/Libraries')
-rw-r--r-- | Userland/Libraries/LibPDF/Encoding.cpp | 9 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Encoding.h | 6 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp | 3 | ||||
-rw-r--r-- | Userland/Libraries/LibPDF/Fonts/Type1Font.cpp | 3 |
4 files changed, 20 insertions, 1 deletions
diff --git a/Userland/Libraries/LibPDF/Encoding.cpp b/Userland/Libraries/LibPDF/Encoding.cpp index e8cd5c7ba0..f18152dafe 100644 --- a/Userland/Libraries/LibPDF/Encoding.cpp +++ b/Userland/Libraries/LibPDF/Encoding.cpp @@ -115,6 +115,7 @@ NonnullRefPtr<Encoding> Encoding::windows_encoding() encoding->m_name_mapping.set(#name, name##_code_point); ENUMERATE_LATIN_CHARACTER_SET(ENUMERATE) #undef ENUMERATE + encoding->m_windows = true; } return encoding; @@ -170,4 +171,12 @@ CharDescriptor const& Encoding::get_char_code_descriptor(u16 char_code) const return const_cast<Encoding*>(this)->m_descriptors.ensure(char_code); } +bool Encoding::should_map_to_bullet(u16 char_code) const +{ + // PDF Annex D table D.2, note 3: + // In WinAnsiEncoding, all unused codes greater than 40 (octal) map to the bullet character. However, only + // code 225 (octal) shall be specifically assigned to the bullet character; other codes are subject to future re-assignment. + return m_windows && char_code > 040 && !m_descriptors.contains(char_code); +} + } diff --git a/Userland/Libraries/LibPDF/Encoding.h b/Userland/Libraries/LibPDF/Encoding.h index 44b56cb4de..819eaa33d0 100644 --- a/Userland/Libraries/LibPDF/Encoding.h +++ b/Userland/Libraries/LibPDF/Encoding.h @@ -96,7 +96,7 @@ FN("]", bracketright, 93, 93, 93, 93) \ FN(" ̆", breve, 198, 249, -1, 24) \ FN("¦", brokenbar, -1, -1, 166, 166) \ - FN("•", bullet, 183, 165, 149, 128) /* FIXME: Note 3 */ \ + FN("•", bullet, 183, 165, 149, 128) \ FN("c", c, 99, 99, 99, 99) \ FN("ˇ", caron, 207, 255, -1, 25) \ FN("ç", ccedilla, -1, 141, 231, 231) \ @@ -647,9 +647,13 @@ public: CharDescriptor const& get_char_code_descriptor(u16 char_code) const; + bool should_map_to_bullet(u16 char_code) const; + protected: HashMap<u16, CharDescriptor> m_descriptors; HashMap<DeprecatedString, u16> m_name_mapping; + + bool m_windows { false }; }; } diff --git a/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp b/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp index f6b07249d8..71376bfd0b 100644 --- a/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp +++ b/Userland/Libraries/LibPDF/Fonts/TrueTypeFont.cpp @@ -48,6 +48,9 @@ u32 TrueTypeFont::char_code_to_code_point(u16 char_code) const if (m_data.to_unicode) TODO(); + if (m_data.encoding->should_map_to_bullet(char_code)) + return 8226; // Bullet. + auto descriptor = m_data.encoding->get_char_code_descriptor(char_code); return descriptor.code_point; } diff --git a/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp b/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp index 392dad2f90..89f64ec45d 100644 --- a/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp +++ b/Userland/Libraries/LibPDF/Fonts/Type1Font.cpp @@ -57,6 +57,9 @@ u32 Type1Font::char_code_to_code_point(u16 char_code) const if (m_data.to_unicode) TODO(); + if (m_data.encoding->should_map_to_bullet(char_code)) + return 8226; // Bullet. + auto descriptor = m_data.encoding->get_char_code_descriptor(char_code); return descriptor.code_point; } |