summaryrefslogtreecommitdiff
path: root/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java
blob: 1c1ba9f5341c60aa2a2ce14b30d2d7dd9a981c51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
package de.danoeh.antennapod.parser.feed.namespace;

import android.text.TextUtils;
import android.util.Log;

import androidx.core.text.HtmlCompat;
import de.danoeh.antennapod.parser.feed.HandlerState;
import de.danoeh.antennapod.parser.feed.element.SyndElement;
import de.danoeh.antennapod.parser.feed.util.DateUtils;
import de.danoeh.antennapod.parser.feed.util.SyndStringUtils;
import org.xml.sax.Attributes;

import de.danoeh.antennapod.model.feed.FeedItem;
import de.danoeh.antennapod.model.feed.FeedMedia;
import de.danoeh.antennapod.parser.feed.util.MimeTypeUtils;

import java.util.Locale;

/**
 * SAX-Parser for reading RSS-Feeds.
 */
public class Rss20 extends Namespace {

    private static final String TAG = "NSRSS20";

    public static final String CHANNEL = "channel";
    public static final String ITEM = "item";
    private static final String GUID = "guid";
    private static final String TITLE = "title";
    private static final String LINK = "link";
    private static final String DESCR = "description";
    private static final String PUBDATE = "pubDate";
    private static final String ENCLOSURE = "enclosure";
    private static final String IMAGE = "image";
    private static final String URL = "url";
    private static final String LANGUAGE = "language";

    private static final String ENC_URL = "url";
    private static final String ENC_LEN = "length";
    private static final String ENC_TYPE = "type";

    @Override
    public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) {
        if (ITEM.equals(localName) && CHANNEL.equals(state.getTagstack().lastElement().getName())) {
            state.setCurrentItem(new FeedItem());
            state.getItems().add(state.getCurrentItem());
            state.getCurrentItem().setFeed(state.getFeed());
        } else if (ENCLOSURE.equals(localName) && ITEM.equals(state.getTagstack().peek().getName())) {
            String url = attributes.getValue(ENC_URL);
            String mimeType = MimeTypeUtils.getMimeType(attributes.getValue(ENC_TYPE), url);

            boolean validUrl = !TextUtils.isEmpty(url);
            if (state.getCurrentItem() != null && state.getCurrentItem().getMedia() == null
                    && MimeTypeUtils.isMediaFile(mimeType) && validUrl) {
                long size = 0;
                try {
                    size = Long.parseLong(attributes.getValue(ENC_LEN));
                    if (size < 16384) {
                        // less than 16kb is suspicious, check manually
                        size = 0;
                    }
                } catch (NumberFormatException e) {
                    Log.d(TAG, "Length attribute could not be parsed.");
                }
                FeedMedia media = new FeedMedia(state.getCurrentItem(), url, size, mimeType);
                state.getCurrentItem().setMedia(media);
            }
        }
        return new SyndElement(localName, this);
    }

    @Override
    public void handleElementEnd(String localName, HandlerState state) {
        if (ITEM.equals(localName)) {
            if (state.getCurrentItem() != null) {
                FeedItem currentItem = state.getCurrentItem();
                // the title tag is optional in RSS 2.0. The description is used
                // as a title if the item has no title-tag.
                if (currentItem.getTitle() == null) {
                    currentItem.setTitle(currentItem.getDescription());
                }

                if (state.getTempObjects().containsKey(Itunes.DURATION)) {
                    if (currentItem.hasMedia()) {
                        Integer duration = (Integer) state.getTempObjects().get(Itunes.DURATION);
                        currentItem.getMedia().setDuration(duration);
                    }
                    state.getTempObjects().remove(Itunes.DURATION);
                }
            }
            state.setCurrentItem(null);
        } else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) {
            String contentRaw = state.getContentBuf().toString();
            String content = SyndStringUtils.trimAllWhitespace(contentRaw);
            String contentFromHtml = HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_COMPACT).toString();
            SyndElement topElement = state.getTagstack().peek();
            String top = topElement.getName();
            SyndElement secondElement = state.getSecondTag();
            String second = secondElement.getName();
            String third = null;
            if (state.getTagstack().size() >= 3) {
                third = state.getThirdTag().getName();
            }
            if (GUID.equals(top) && ITEM.equals(second)) {
                // some feed creators include an empty or non-standard guid-element in their feed,
                // which should be ignored
                if (!TextUtils.isEmpty(contentRaw) && state.getCurrentItem() != null) {
                    state.getCurrentItem().setItemIdentifier(contentRaw);
                }
            } else if (TITLE.equals(top)) {
                if (ITEM.equals(second) && state.getCurrentItem() != null) {
                    state.getCurrentItem().setTitle(contentFromHtml);
                } else if (CHANNEL.equals(second) && state.getFeed() != null) {
                    state.getFeed().setTitle(contentFromHtml);
                }
            } else if (LINK.equals(top)) {
                if (CHANNEL.equals(second) && state.getFeed() != null) {
                    state.getFeed().setLink(content);
                } else if (ITEM.equals(second) && state.getCurrentItem() != null) {
                    state.getCurrentItem().setLink(content);
                }
            } else if (PUBDATE.equals(top) && ITEM.equals(second) && state.getCurrentItem() != null) {
                state.getCurrentItem().setPubDate(DateUtils.parseOrNullIfFuture(content));
            } else if (URL.equals(top) && IMAGE.equals(second) && CHANNEL.equals(third)) {
                // prefer itunes:image
                if (state.getFeed() != null && state.getFeed().getImageUrl() == null) {
                    state.getFeed().setImageUrl(content);
                }
            } else if (DESCR.equals(localName)) {
                if (CHANNEL.equals(second) && state.getFeed() != null) {
                    state.getFeed().setDescription(contentFromHtml);
                } else if (ITEM.equals(second) && state.getCurrentItem() != null) {
                    state.getCurrentItem().setDescriptionIfLonger(contentFromHtml);
                }
            } else if (LANGUAGE.equals(localName) && state.getFeed() != null) {
                state.getFeed().setLanguage(content.toLowerCase(Locale.US));
            }
        }
    }

}