diff options
author | fossterer <shashank@linux.com> | 2019-10-24 23:20:31 -0400 |
---|---|---|
committer | fossterer <shashank@linux.com> | 2019-10-24 23:20:31 -0400 |
commit | ca83c5953727d4096ddb7af353575b23324dce80 (patch) | |
tree | 562983ca2746e3f94089ffcb68e66c8d30ec9ae6 /core/src/main/java/de/danoeh/antennapod | |
parent | 437f3f29c06e4a253e841fc959c768aec8d565a0 (diff) | |
download | AntennaPod-ca83c5953727d4096ddb7af353575b23324dce80.zip |
Fixes #3444 - Strip off HTML from podcast descriptions
Diffstat (limited to 'core/src/main/java/de/danoeh/antennapod')
-rw-r--r-- | core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java | 40 |
1 files changed, 40 insertions, 0 deletions
diff --git a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java index 61072f1ad..3550f28c6 100644 --- a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java +++ b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java @@ -1,12 +1,19 @@ package de.danoeh.antennapod.core.util.syndication; +import android.text.TextUtils; + +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.nodes.TextNode; import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; +import java.util.regex.Pattern; + /** * This class is based on <code>HtmlToPlainText</code> from jsoup's examples package. * @@ -27,6 +34,39 @@ import org.jsoup.select.NodeVisitor; public class HtmlToPlainText { /** + * Use this method to strip off HTML encoding from given text + * <p> + * Replaces bullet points with *, ignores colors/bold/... + * + * @param str String with any encoding + * @return Human readable text with minimal HTML formatting + */ + public static String getPlainText(String str) { + if (!TextUtils.isEmpty(str) && isHtml(str)) { + HtmlToPlainText formatter = new HtmlToPlainText(); + Document feedDescription = Jsoup.parse(str); + str = StringUtils.trim(formatter.getPlainText(feedDescription)); + } else if (TextUtils.isEmpty(str)) { + str = ""; + } + + return str; + } + + /** + * Use this method to determine if a given text has any HTML tag + * + * @param str String to be tested for presence of HTML content + * @return <b>True</b> if text contains any HTML tags</br><b>False</b> is no HTML tag is found + */ + private static boolean isHtml(String str) { + final String HTML_TAG_PATTERN = "<(\"[^\"]*\"|'[^']*'|[^'\">])*>"; + Pattern htmlValidator = TextUtils.isEmpty(HTML_TAG_PATTERN) ? null : Pattern.compile(HTML_TAG_PATTERN); + + return htmlValidator.matcher(str).find(); + } + + /** * Format an Element to plain-text * @param element the root element to format * @return formatted text |