summaryrefslogtreecommitdiff
path: root/src/de/danoeh/antennapod/util/syndication/FeedDiscoverer.java
blob: ac38ec8767dec1732af54925a7b4f8d4e53977d4 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
package de.danoeh.antennapod.util.syndication;

import android.net.Uri;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.Map;

/**
 * Finds RSS/Atom URLs in a HTML document using the auto-discovery techniques described here:
 * <p/>
 * http://www.rssboard.org/rss-autodiscovery
 * <p/>
 * http://blog.whatwg.org/feed-autodiscovery
 */
public class FeedDiscoverer {

    private static final String MIME_RSS = "application/rss+xml";
    private static final String MIME_ATOM = "application/atom+xml";

    /**
     * Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
     *
     * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
     * a title cannot be found).
     */
    public Map<String, String> findLinks(File in, String baseUrl) throws IOException {
        return findLinks(Jsoup.parse(in, null), baseUrl);
    }

    /**
     * Discovers links to RSS and Atom feeds in the given File which must be a HTML document.
     *
     * @return A map which contains the feed URLs as keys and titles as values (the feed URL is also used as a title if
     * a title cannot be found).
     */
    public Map<String, String> findLinks(String in, String baseUrl) throws IOException {
        return findLinks(Jsoup.parse(in), baseUrl);
    }

    private Map<String, String> findLinks(Document document, String baseUrl) {
        Map<String, String> res = new LinkedHashMap<String, String>();
        Elements links = document.head().getElementsByTag("link");
        for (Element link : links) {
            String rel = link.attr("rel");
            String href = link.attr("href");
            if (!StringUtils.isEmpty(href) &&
                    (rel.equals("alternate") || rel.equals("feed"))) {
                String type = link.attr("type");
                if (type.equals(MIME_RSS) || type.equals(MIME_ATOM)) {
                    String title = link.attr("title");
                    String processedUrl = processURL(baseUrl, href);
                    if (processedUrl != null) {
                        res.put(processedUrl,
                                (StringUtils.isEmpty(title)) ? href : title);
                    }
                }
            }
        }
        return res;
    }

    private String processURL(String baseUrl, String strUrl) {
        Uri uri = Uri.parse(strUrl);
        if (uri.isRelative()) {
            Uri res = Uri.parse(baseUrl).buildUpon().path(strUrl).build();
            return (res != null) ? res.toString() : null;
        } else {
            return strUrl;
        }
    }
}