diff options
author | Martin Fietz <Martin.Fietz@gmail.com> | 2016-11-01 20:49:16 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2016-11-01 20:49:16 +0100 |
commit | 3322f11612125c4309f4c6a7ae4eb487ceadd8d5 (patch) | |
tree | bea256ddf8ec9c5c25f0420c9e76e1200298cafe | |
parent | c2a7adc6c4a1600749437de624391aff7fbfbff3 (diff) | |
parent | ab182c4b63fd52c67692ade92cff43461f0b9587 (diff) | |
download | AntennaPod-3322f11612125c4309f4c6a7ae4eb487ceadd8d5.zip |
Merge pull request #2138 from mfietz/2126-atom-html
Sanitize HTML from Atom feed descriptions/subtitles
4 files changed, 174 insertions, 76 deletions
diff --git a/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java b/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java index 31b405329..19aabfc88 100644 --- a/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java +++ b/app/src/main/java/de/danoeh/antennapod/activity/FeedInfoActivity.java @@ -28,6 +28,10 @@ import android.widget.Toast; import com.bumptech.glide.Glide; import com.joanzapata.iconify.Iconify; +import org.apache.commons.lang3.StringUtils; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + import de.danoeh.antennapod.R; import de.danoeh.antennapod.core.dialog.ConfirmationDialog; import de.danoeh.antennapod.core.dialog.DownloadRequestErrorDialogCreator; @@ -41,6 +45,7 @@ import de.danoeh.antennapod.core.storage.DBWriter; import de.danoeh.antennapod.core.storage.DownloadRequestException; import de.danoeh.antennapod.core.util.IntentUtils; import de.danoeh.antennapod.core.util.LangUtils; +import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText; import de.danoeh.antennapod.menuhandler.FeedMenuHandler; import rx.Observable; import rx.Subscription; @@ -51,11 +56,10 @@ import rx.schedulers.Schedulers; * Displays information about a feed. */ public class FeedInfoActivity extends AppCompatActivity { - private static final String TAG = "FeedInfoActivity"; - private boolean autoDeleteChanged = false; public static final String EXTRA_FEED_ID = "de.danoeh.antennapod.extra.feedId"; - + private static final String TAG = "FeedInfoActivity"; + private boolean autoDeleteChanged = false; private Feed feed; private ImageView imgvCover; @@ -78,6 +82,7 @@ public class FeedInfoActivity extends AppCompatActivity { private Subscription subscription; + private final View.OnClickListener copyUrlToClipboard = new View.OnClickListener() { @Override public void onClick(View v) { @@ -99,6 +104,40 @@ public class FeedInfoActivity extends AppCompatActivity { } }; + private boolean authInfoChanged = false; + + private TextWatcher authTextWatcher = new TextWatcher() { + @Override + public void beforeTextChanged(CharSequence s, int start, int count, int after) { + } + + @Override + public void onTextChanged(CharSequence s, int start, int before, int count) { + } + + @Override + public void afterTextChanged(Editable s) { + authInfoChanged = true; + } + }; + + private boolean filterTextChanged = false; + + private TextWatcher filterTextWatcher = new TextWatcher() { + @Override + public void beforeTextChanged(CharSequence s, int start, int count, int after) { + } + + @Override + public void onTextChanged(CharSequence s, int start, int before, int count) { + } + + @Override + public void afterTextChanged(Editable s) { + filterTextChanged = true; + } + }; + @Override protected void onCreate(Bundle savedInstanceState) { setTheme(UserPreferences.getTheme()); @@ -157,8 +196,19 @@ public class FeedInfoActivity extends AppCompatActivity { .into(imgvCover); txtvTitle.setText(feed.getTitle()); + String description = feed.getDescription(); - txtvDescription.setText((description != null) ? description.trim() : ""); + if(description != null) { + if(Feed.TYPE_ATOM1.equals(feed.getType())) { + HtmlToPlainText formatter = new HtmlToPlainText(); + Document feedDescription = Jsoup.parse(feed.getDescription()); + description = StringUtils.trim(formatter.getPlainText(feedDescription)); + } + } else { + description = ""; + } + txtvDescription.setText(description); + if (!TextUtils.isEmpty(feed.getAuthor())) { txtvAuthor.setText(feed.getAuthor()); } else { @@ -251,53 +301,6 @@ public class FeedInfoActivity extends AppCompatActivity { } @Override - public void onDestroy() { - super.onDestroy(); - if(subscription != null) { - subscription.unsubscribe(); - } - } - - - private boolean authInfoChanged = false; - - private TextWatcher authTextWatcher = new TextWatcher() { - @Override - public void beforeTextChanged(CharSequence s, int start, int count, int after) { - - } - - @Override - public void onTextChanged(CharSequence s, int start, int before, int count) { - - } - - @Override - public void afterTextChanged(Editable s) { - authInfoChanged = true; - } - }; - - private boolean filterTextChanged = false; - - private TextWatcher filterTextWatcher = new TextWatcher() { - @Override - public void beforeTextChanged(CharSequence s, int start, int count, int after) { - - } - - @Override - public void onTextChanged(CharSequence s, int start, int before, int count) { - - } - - @Override - public void afterTextChanged(Editable s) { - filterTextChanged = true; - } - }; - - @Override protected void onPause() { super.onPause(); if (feed != null) { @@ -329,6 +332,14 @@ public class FeedInfoActivity extends AppCompatActivity { } @Override + public void onDestroy() { + super.onDestroy(); + if(subscription != null) { + subscription.unsubscribe(); + } + } + + @Override public boolean onCreateOptionsMenu(Menu menu) { super.onCreateOptionsMenu(menu); MenuInflater inflater = getMenuInflater(); @@ -379,7 +390,7 @@ public class FeedInfoActivity extends AppCompatActivity { private final Feed feed; private final boolean autoDownload; - public ApplyToEpisodesDialog(Context context, Feed feed, boolean autoDownload) { + ApplyToEpisodesDialog(Context context, Feed feed, boolean autoDownload) { super(context, R.string.auto_download_apply_to_items_title, R.string.auto_download_apply_to_items_message); this.feed = feed; diff --git a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java index a40877832..99f3bcc00 100644 --- a/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java +++ b/app/src/main/java/de/danoeh/antennapod/activity/OnlineFeedViewActivity.java @@ -29,7 +29,6 @@ import com.bumptech.glide.Glide; import org.apache.commons.lang3.StringUtils; import org.jsoup.Jsoup; -import org.jsoup.examples.HtmlToPlainText; import org.jsoup.nodes.Document; import java.io.File; @@ -63,6 +62,7 @@ import de.danoeh.antennapod.core.util.FileNameGenerator; import de.danoeh.antennapod.core.util.StorageUtils; import de.danoeh.antennapod.core.util.URLChecker; import de.danoeh.antennapod.core.util.syndication.FeedDiscoverer; +import de.danoeh.antennapod.core.util.syndication.HtmlToPlainText; import de.danoeh.antennapod.dialog.AuthenticationDialog; import de.greenrobot.event.EventBus; import rx.Observable; @@ -81,17 +81,12 @@ import rx.schedulers.Schedulers; */ public class OnlineFeedViewActivity extends AppCompatActivity { - private static final String TAG = "OnlineFeedViewActivity"; - public static final String ARG_FEEDURL = "arg.feedurl"; - // Optional argument: specify a title for the actionbar. public static final String ARG_TITLE = "title"; - - private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE; - public static final int RESULT_ERROR = 2; - + private static final String TAG = "OnlineFeedViewActivity"; + private static final int EVENTS = EventDistributor.FEED_LIST_UPDATE; private volatile List<Feed> feeds; private Feed feed; private String selectedDownloadUrl; @@ -106,17 +101,11 @@ public class OnlineFeedViewActivity extends AppCompatActivity { private Subscription download; private Subscription parser; private Subscription updater; - - public void onEventMainThread(DownloadEvent event) { - Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]"); - setSubscribeButtonState(feed); - } - private EventDistributor.EventListener listener = new EventDistributor.EventListener() { @Override public void update(EventDistributor eventDistributor, Integer arg) { if ((arg & EventDistributor.FEED_LIST_UPDATE) != 0) { - updater = Observable.fromCallable(() -> DBReader.getFeedList()) + updater = Observable.fromCallable(DBReader::getFeedList) .subscribeOn(Schedulers.newThread()) .observeOn(AndroidSchedulers.mainThread()) .subscribe( @@ -133,6 +122,11 @@ public class OnlineFeedViewActivity extends AppCompatActivity { } }; + public void onEventMainThread(DownloadEvent event) { + Log.d(TAG, "onEventMainThread() called with: " + "event = [" + event + "]"); + setSubscribeButtonState(feed); + } + @Override protected void onCreate(Bundle savedInstanceState) { setTheme(UserPreferences.getTheme()); @@ -284,7 +278,7 @@ public class OnlineFeedViewActivity extends AppCompatActivity { }) .subscribeOn(Schedulers.newThread()) .observeOn(AndroidSchedulers.mainThread()) - .subscribe(status -> checkDownloadResult(status), + .subscribe(this::checkDownloadResult, error -> Log.e(TAG, Log.getStackTraceString(error))); } @@ -360,14 +354,19 @@ public class OnlineFeedViewActivity extends AppCompatActivity { * This method is executed on a background thread */ private void beforeShowFeedInformation(Feed feed) { - // remove HTML tags from descriptions + final HtmlToPlainText formatter = new HtmlToPlainText(); + if(Feed.TYPE_ATOM1.equals(feed.getType())) { + // remove HTML tags from descriptions + Log.d(TAG, "Removing HTML from feed description"); + Document feedDescription = Jsoup.parse(feed.getDescription()); + feed.setDescription(StringUtils.trim(formatter.getPlainText(feedDescription))); + } Log.d(TAG, "Removing HTML from shownotes"); if (feed.getItems() != null) { - HtmlToPlainText formatter = new HtmlToPlainText(); for (FeedItem item : feed.getItems()) { if (item.getDescription() != null) { - Document description = Jsoup.parse(item.getDescription()); - item.setDescription(StringUtils.trim(formatter.getPlainText(description))); + Document itemDescription = Jsoup.parse(item.getDescription()); + item.setDescription(StringUtils.trim(formatter.getPlainText(itemDescription))); } } } @@ -589,7 +588,7 @@ public class OnlineFeedViewActivity extends AppCompatActivity { private String feedUrl; - public FeedViewAuthenticationDialog(Context context, int titleRes, String feedUrl) { + FeedViewAuthenticationDialog(Context context, int titleRes, String feedUrl) { super(context, titleRes, true, false, null, null); this.feedUrl = feedUrl; } diff --git a/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java b/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java index 8aaf0055a..5c58d00f2 100644 --- a/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java +++ b/app/src/main/java/de/danoeh/antennapod/adapter/AdapterUtils.java @@ -15,7 +15,7 @@ import de.danoeh.antennapod.core.util.NetworkUtils; /** * Utility methods for adapters */ -public class AdapterUtils { +class AdapterUtils { private static final String TAG = AdapterUtils.class.getSimpleName(); @@ -26,7 +26,7 @@ public class AdapterUtils { /** * Updates the contents of the TextView that shows the current playback position and the ProgressBar. */ - public static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) { + static void updateEpisodePlaybackProgress(FeedItem item, TextView txtvPos, ProgressBar episodeProgress) { FeedMedia media = item.getMedia(); episodeProgress.setVisibility(View.GONE); if (media == null) { @@ -47,7 +47,6 @@ public class AdapterUtils { - media.getPosition())); } } else if (!media.isDownloaded()) { - Log.d(TAG, "size: " + media.getSize()); if (media.getSize() > 0) { txtvPos.setText(Converter.byteToString(media.getSize())); } else if(NetworkUtils.isDownloadAllowed() && !media.checkedOnSizeButUnknown()) { diff --git a/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java new file mode 100644 index 000000000..bd40f398d --- /dev/null +++ b/core/src/main/java/de/danoeh/antennapod/core/util/syndication/HtmlToPlainText.java @@ -0,0 +1,89 @@ +package de.danoeh.antennapod.core.util.syndication; + +import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; + +/** + * This class is based on <code>HtmlToPlainText</code> from jsoup's examples package. + * + * HTML to plain-text. This example program demonstrates the use of jsoup to convert HTML input to lightly-formatted + * plain-text. That is divergent from the general goal of jsoup's .text() methods, which is to get clean data from a + * scrape. + * <p> + * Note that this is a fairly simplistic formatter -- for real world use you'll want to embrace and extend. + * </p> + * <p> + * To invoke from the command line, assuming you've downloaded the jsoup jar to your current directory:</p> + * <p><code>java -cp jsoup.jar org.jsoup.examples.HtmlToPlainText url [selector]</code></p> + * where <i>url</i> is the URL to fetch, and <i>selector</i> is an optional CSS selector. + * + * @author Jonathan Hedley, jonathan@hedley.net + * @author AntennaPod open source community + */ +public class HtmlToPlainText { + + /** + * Format an Element to plain-text + * @param element the root element to format + * @return formatted text + */ + public String getPlainText(Element element) { + FormattingVisitor formatter = new FormattingVisitor(); + NodeTraversor traversor = new NodeTraversor(formatter); + traversor.traverse(element); // walk the DOM, and call .head() and .tail() for each node + + return formatter.toString(); + } + + // the formatting rules, implemented in a breadth-first DOM traverse + private class FormattingVisitor implements NodeVisitor { + + private StringBuilder accum = new StringBuilder(); // holds the accumulated text + + // hit when the node is first seen + public void head(Node node, int depth) { + String name = node.nodeName(); + if (node instanceof TextNode) { + append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM. + } + else if (name.equals("li")) { + append("\n * "); + } + else if (name.equals("dt")) { + append(" "); + } + else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5", "tr")) { + append("\n"); + } + } + + // hit when all of the node's children (if any) have been visited + public void tail(Node node, int depth) { + String name = node.nodeName(); + if (StringUtil.in(name, "br", "dd", "dt", "p", "h1", "h2", "h3", "h4", "h5")) { + append("\n"); + } else if (name.equals("a")) { + append(String.format(" <%s>", node.absUrl("href"))); + } + } + + // appends text to the string builder with a simple word wrap method + private void append(String text) { + if (text.equals(" ") && + (accum.length() == 0 || StringUtil.in(accum.substring(accum.length() - 1), " ", "\n"))) { + return; // don't accumulate long runs of empty spaces + } + + accum.append(text); + } + + @Override + public String toString() { + return accum.toString(); + } + } +} |