Implemented FeedDiscoverer

author: daniel oeh <daniel.oeh@gmail.com> 2014-06-16 00:16:48 +0200
committer: daniel oeh <daniel.oeh@gmail.com> 2014-06-16 00:16:48 +0200
commit: 7fc0e73ea7bcf21f843a0d94426e8df515182271 (patch)
tree: 878557166488da12e821059c68135d635696e5c2 /src/de
parent: 859eabb7a302d79948ca9da4ceb886908932482a (diff)
download: AntennaPod-7fc0e73ea7bcf21f843a0d94426e8df515182271.zip
1 files changed, 78 insertions, 0 deletions
diff --git a/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java b/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java
new file mode 100644
index 000000000..ac38ec876
--- /dev/null
+++ b/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java
@@ -0,0 +1,78 @@
+package de.danoeh.antennapod.util.syndication;
+
+import android.net.Uri;
+import org.apache.commons.lang3.StringUtils;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+/**
+ * Finds RSS/Atom URLs in a HTML document using the auto-discovery techniques described here:
+ * <p/>
+ * http://www.rssboard.org/rss-autodiscovery
+ * <p/>
+ * http://blog.whatwg.org/feed-autodiscovery
+ */
+public class FeedDiscoverer {
+
+    private static final String MIME_RSS = "application/rss+xml";
+    private static final String MIME_ATOM = "application/atom+xml";
+
+    /**
+     * Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
+     *
+     * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
+     * a title cannot be found).
+     */
+    public Map<String, String> findLinks(File in, String baseUrl) throws IOException {
+        return findLinks(Jsoup.parse(in, null), baseUrl);
+    }
+
+    /**
+     * Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
+     *
+     * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
+     * a title cannot be found).
+     */
+    public Map<String, String> findLinks(String in, String baseUrl) throws IOException {
+        return findLinks(Jsoup.parse(in), baseUrl);
+    }
+
+    private Map<String, String> findLinks(Document document, String baseUrl) {
+        Map<String, String> res = new LinkedHashMap<String, String>();
+        Elements links = document.head().getElementsByTag("link");
+        for (Element link : links) {
+            String rel = link.attr("rel");
+            String href = link.attr("href");
+            if (!StringUtils.isEmpty(href) &&
+                    (rel.equals("alternate") || rel.equals("feed"))) {
+                String type = link.attr("type");
+                if (type.equals(MIME_RSS) || type.equals(MIME_ATOM)) {
+                    String title = link.attr("title");
+                    String processedUrl = processURL(baseUrl, href);
+                    if (processedUrl != null) {
+                        res.put(processedUrl,
+                                (StringUtils.isEmpty(title)) ? href : title);
+                    }
+                }
+            }
+        }
+        return res;
+    }
+
+    private String processURL(String baseUrl, String strUrl) {
+        Uri uri = Uri.parse(strUrl);
+        if (uri.isRelative()) {
+            Uri res = Uri.parse(baseUrl).buildUpon().path(strUrl).build();
+            return (res != null) ? res.toString() : null;
+        } else {
+            return strUrl;
+        }
+    }
+}
author	daniel oeh <daniel.oeh@gmail.com>	2014-06-16 00:16:48 +0200
committer	daniel oeh <daniel.oeh@gmail.com>	2014-06-16 00:16:48 +0200
commit	7fc0e73ea7bcf21f843a0d94426e8df515182271 (patch)
tree	878557166488da12e821059c68135d635696e5c2 /src/de
parent	859eabb7a302d79948ca9da4ceb886908932482a (diff)
download	AntennaPod-7fc0e73ea7bcf21f843a0d94426e8df515182271.zip