diff options
author | ByteHamster <info@bytehamster.com> | 2021-08-28 00:12:48 +0200 |
---|---|---|
committer | ByteHamster <info@bytehamster.com> | 2021-08-28 09:57:41 +0200 |
commit | 24389d42e89037b205fff2bc681e4ad998895286 (patch) | |
tree | 1c3d979e043510431d54bd9ff0fab86ac7159737 /parser/feed/src | |
parent | 85c8a419acb385cdf249662866715965de219c93 (diff) | |
download | AntennaPod-24389d42e89037b205fff2bc681e4ad998895286.zip |
Moved feed parser to its own module
Diffstat (limited to 'parser/feed/src')
36 files changed, 2180 insertions, 0 deletions
diff --git a/parser/feed/src/main/AndroidManifest.xml b/parser/feed/src/main/AndroidManifest.xml new file mode 100644 index 000000000..44b10f29a --- /dev/null +++ b/parser/feed/src/main/AndroidManifest.xml @@ -0,0 +1 @@ +<manifest package="de.danoeh.antennapod.parser.feed" /> diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java new file mode 100644 index 000000000..c7f5c4f21 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java @@ -0,0 +1,91 @@ +package de.danoeh.antennapod.parser.feed; + +import android.text.TextUtils; +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.util.TypeGetter; +import org.apache.commons.io.input.XmlStreamReader; +import org.xml.sax.InputSource; +import org.xml.sax.SAXException; + +import java.io.File; +import java.io.IOException; +import java.io.Reader; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.model.feed.FeedItem; + +public class FeedHandler { + private static final String TAG = "FeedHandler"; + + public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException, + ParserConfigurationException, UnsupportedFeedtypeException { + TypeGetter tg = new TypeGetter(); + TypeGetter.Type type = tg.getType(feed); + SyndHandler handler = new SyndHandler(feed, type); + + SAXParserFactory factory = SAXParserFactory.newInstance(); + factory.setNamespaceAware(true); + SAXParser saxParser = factory.newSAXParser(); + File file = new File(feed.getFile_url()); + Reader inputStreamReader = new XmlStreamReader(file); + InputSource inputSource = new InputSource(inputStreamReader); + + saxParser.parse(inputSource, handler); + inputStreamReader.close(); + feed.setItems(dedupItems(feed.getItems())); + return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls); + } + + /** + * For updating items that are stored in the database, see also: DBTasks.searchFeedItemByIdentifyingValue + */ + public static List<FeedItem> dedupItems(List<FeedItem> items) { + if (items == null) { + return null; + } + List<FeedItem> list = new ArrayList<>(items); + Set<String> seen = new HashSet<>(); + Iterator<FeedItem> it = list.iterator(); + while (it.hasNext()) { + FeedItem item = it.next(); + if (!TextUtils.isEmpty(item.getItemIdentifier()) && seen.contains(item.getItemIdentifier())) { + Log.d(TAG, "Removing duplicate episode guid " + item.getItemIdentifier()); + it.remove(); + continue; + } + + if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) { + continue; + } + if (seen.contains(item.getMedia().getStreamUrl())) { + Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl()); + it.remove(); + } else { + seen.add(item.getMedia().getStreamUrl()); + if (TextUtils.isEmpty(item.getTitle()) || item.getPubDate() == null) { + continue; + } + if (!seen.contains(item.getTitle() + item.getPubDate().toString())) { + seen.add(item.getTitle() + item.getPubDate().toString()); + } else { + Log.d(TAG, "Removing duplicate episode title and pubDate " + + item.getTitle() + + " " + item.getPubDate()); + it.remove(); + } + } + seen.add(item.getItemIdentifier()); + } + return list; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandlerResult.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandlerResult.java new file mode 100644 index 000000000..43b3387a0 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandlerResult.java @@ -0,0 +1,19 @@ +package de.danoeh.antennapod.parser.feed; + +import java.util.Map; + +import de.danoeh.antennapod.model.feed.Feed; + +/** + * Container for results returned by the Feed parser + */ +public class FeedHandlerResult { + + public final Feed feed; + public final Map<String, String> alternateFeedUrls; + + public FeedHandlerResult(Feed feed, Map<String, String> alternateFeedUrls) { + this.feed = feed; + this.alternateFeedUrls = alternateFeedUrls; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/HandlerState.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/HandlerState.java new file mode 100644 index 000000000..706a328e8 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/HandlerState.java @@ -0,0 +1,120 @@ +package de.danoeh.antennapod.parser.feed; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Stack; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.model.feed.FeedFunding; +import de.danoeh.antennapod.model.feed.FeedItem; +import de.danoeh.antennapod.parser.feed.namespace.Namespace; +import de.danoeh.antennapod.parser.feed.element.SyndElement; + +/** + * Contains all relevant information to describe the current state of a + * SyndHandler. + */ +public class HandlerState { + + /** + * Feed that the Handler is currently processing. + */ + public Feed feed; + /** + * Contains links to related feeds, e.g. feeds with enclosures in other formats. The key of the map is the + * URL of the feed, the value is the title + */ + public final Map<String, String> alternateUrls; + private final ArrayList<FeedItem> items; + private FeedItem currentItem; + private FeedFunding currentFunding; + final Stack<SyndElement> tagstack; + /** + * Namespaces that have been defined so far. + */ + final Map<String, Namespace> namespaces; + final Stack<Namespace> defaultNamespaces; + /** + * Buffer for saving characters. + */ + protected StringBuilder contentBuf; + + /** + * Temporarily saved objects. + */ + private final Map<String, Object> tempObjects; + + public HandlerState(Feed feed) { + this.feed = feed; + alternateUrls = new HashMap<>(); + items = new ArrayList<>(); + tagstack = new Stack<>(); + namespaces = new HashMap<>(); + defaultNamespaces = new Stack<>(); + tempObjects = new HashMap<>(); + } + + public Feed getFeed() { + return feed; + } + + public ArrayList<FeedItem> getItems() { + return items; + } + + public FeedItem getCurrentItem() { + return currentItem; + } + + public Stack<SyndElement> getTagstack() { + return tagstack; + } + + public void setFeed(Feed feed) { + this.feed = feed; + } + + public void setCurrentItem(FeedItem currentItem) { + this.currentItem = currentItem; + } + + public FeedFunding getCurrentFunding() { + return currentFunding; + } + + public void setCurrentFunding(FeedFunding currentFunding) { + this.currentFunding = currentFunding; + } + + /** + * Returns the SyndElement that comes after the top element of the tagstack. + */ + public SyndElement getSecondTag() { + SyndElement top = tagstack.pop(); + SyndElement second = tagstack.peek(); + tagstack.push(top); + return second; + } + + public SyndElement getThirdTag() { + SyndElement top = tagstack.pop(); + SyndElement second = tagstack.pop(); + SyndElement third = tagstack.peek(); + tagstack.push(second); + tagstack.push(top); + return third; + } + + public StringBuilder getContentBuf() { + return contentBuf; + } + + public void addAlternateFeedUrl(String title, String url) { + alternateUrls.put(url, title); + } + + public Map<String, Object> getTempObjects() { + return tempObjects; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/SyndHandler.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/SyndHandler.java new file mode 100644 index 000000000..16bbecbb8 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/SyndHandler.java @@ -0,0 +1,139 @@ +package de.danoeh.antennapod.parser.feed; + +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.util.TypeGetter; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.parser.feed.namespace.Content; +import de.danoeh.antennapod.parser.feed.namespace.DublinCore; +import de.danoeh.antennapod.parser.feed.namespace.Itunes; +import de.danoeh.antennapod.parser.feed.namespace.Media; +import de.danoeh.antennapod.parser.feed.namespace.Rss20; +import de.danoeh.antennapod.parser.feed.namespace.SimpleChapters; +import de.danoeh.antennapod.parser.feed.namespace.Namespace; +import de.danoeh.antennapod.parser.feed.namespace.PodcastIndex; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.namespace.Atom; + +/** Superclass for all SAX Handlers which process Syndication formats */ +public class SyndHandler extends DefaultHandler { + private static final String TAG = "SyndHandler"; + private static final String DEFAULT_PREFIX = ""; + public final HandlerState state; + + public SyndHandler(Feed feed, TypeGetter.Type type) { + state = new HandlerState(feed); + if (type == TypeGetter.Type.RSS20 || type == TypeGetter.Type.RSS091) { + state.defaultNamespaces.push(new Rss20()); + } + } + + @Override + public void startElement(String uri, String localName, String qualifiedName, + Attributes attributes) throws SAXException { + state.contentBuf = new StringBuilder(); + Namespace handler = getHandlingNamespace(uri, qualifiedName); + if (handler != null) { + SyndElement element = handler.handleElementStart(localName, state, + attributes); + state.tagstack.push(element); + + } + } + + @Override + public void characters(char[] ch, int start, int length) + throws SAXException { + if (!state.tagstack.empty()) { + if (state.getTagstack().size() >= 2) { + if (state.contentBuf != null) { + state.contentBuf.append(ch, start, length); + } + } + } + } + + @Override + public void endElement(String uri, String localName, String qualifiedName) + throws SAXException { + Namespace handler = getHandlingNamespace(uri, qualifiedName); + if (handler != null) { + handler.handleElementEnd(localName, state); + state.tagstack.pop(); + + } + state.contentBuf = null; + + } + + @Override + public void endPrefixMapping(String prefix) throws SAXException { + if (state.defaultNamespaces.size() > 1 && prefix.equals(DEFAULT_PREFIX)) { + state.defaultNamespaces.pop(); + } + } + + @Override + public void startPrefixMapping(String prefix, String uri) + throws SAXException { + // Find the right namespace + if (!state.namespaces.containsKey(uri)) { + if (uri.equals(Atom.NSURI)) { + if (prefix.equals(DEFAULT_PREFIX)) { + state.defaultNamespaces.push(new Atom()); + } else if (prefix.equals(Atom.NSTAG)) { + state.namespaces.put(uri, new Atom()); + Log.d(TAG, "Recognized Atom namespace"); + } + } else if (uri.equals(Content.NSURI) + && prefix.equals(Content.NSTAG)) { + state.namespaces.put(uri, new Content()); + Log.d(TAG, "Recognized Content namespace"); + } else if (uri.equals(Itunes.NSURI) + && prefix.equals(Itunes.NSTAG)) { + state.namespaces.put(uri, new Itunes()); + Log.d(TAG, "Recognized ITunes namespace"); + } else if (uri.equals(SimpleChapters.NSURI) + && prefix.matches(SimpleChapters.NSTAG)) { + state.namespaces.put(uri, new SimpleChapters()); + Log.d(TAG, "Recognized SimpleChapters namespace"); + } else if (uri.equals(Media.NSURI) + && prefix.equals(Media.NSTAG)) { + state.namespaces.put(uri, new Media()); + Log.d(TAG, "Recognized media namespace"); + } else if (uri.equals(DublinCore.NSURI) + && prefix.equals(DublinCore.NSTAG)) { + state.namespaces.put(uri, new DublinCore()); + Log.d(TAG, "Recognized DublinCore namespace"); + } else if (uri.equals(PodcastIndex.NSURI) || uri.equals(PodcastIndex.NSURI2) + && prefix.equals(PodcastIndex.NSTAG)) { + state.namespaces.put(uri, new PodcastIndex()); + Log.d(TAG, "Recognized PodcastIndex namespace"); + } + } + } + + private Namespace getHandlingNamespace(String uri, String qualifiedName) { + Namespace handler = state.namespaces.get(uri); + if (handler == null && !state.defaultNamespaces.empty() + && !qualifiedName.contains(":")) { + handler = state.defaultNamespaces.peek(); + } + return handler; + } + + @Override + public void endDocument() throws SAXException { + super.endDocument(); + state.getFeed().setItems(state.getItems()); + } + + public HandlerState getState() { + return state; + } + +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/UnsupportedFeedtypeException.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/UnsupportedFeedtypeException.java new file mode 100644 index 000000000..74c126a50 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/UnsupportedFeedtypeException.java @@ -0,0 +1,45 @@ +package de.danoeh.antennapod.parser.feed; + +import de.danoeh.antennapod.parser.feed.util.TypeGetter; +import de.danoeh.antennapod.parser.feed.util.TypeGetter.Type; + +public class UnsupportedFeedtypeException extends Exception { + private static final long serialVersionUID = 9105878964928170669L; + private final TypeGetter.Type type; + private String rootElement; + private String message = null; + + public UnsupportedFeedtypeException(Type type) { + super(); + this.type = type; + } + + public UnsupportedFeedtypeException(Type type, String rootElement) { + this.type = type; + this.rootElement = rootElement; + } + + public UnsupportedFeedtypeException(String message) { + this.message = message; + type = Type.INVALID; + } + + public TypeGetter.Type getType() { + return type; + } + + public String getRootElement() { + return rootElement; + } + + @Override + public String getMessage() { + if (message != null) { + return message; + } else if (type == TypeGetter.Type.INVALID) { + return "Invalid type"; + } else { + return "Type " + type + " not supported"; + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/AtomText.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/AtomText.java new file mode 100644 index 000000000..8acd9cbb4 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/AtomText.java @@ -0,0 +1,36 @@ +package de.danoeh.antennapod.parser.feed.element; + +import androidx.core.text.HtmlCompat; + +import de.danoeh.antennapod.parser.feed.namespace.Namespace; + +/** Represents Atom Element which contains text (content, title, summary). */ +public class AtomText extends SyndElement { + public static final String TYPE_HTML = "html"; + private static final String TYPE_XHTML = "xhtml"; + + private final String type; + private String content; + + public AtomText(String name, Namespace namespace, String type) { + super(name, namespace); + this.type = type; + } + + /** Processes the content according to the type and returns it. */ + public String getProcessedContent() { + if (type == null) { + return content; + } else if (type.equals(TYPE_HTML)) { + return HtmlCompat.fromHtml(content, HtmlCompat.FROM_HTML_MODE_LEGACY).toString(); + } else if (type.equals(TYPE_XHTML)) { + return content; + } else { // Handle as text by default + return content; + } + } + + public void setContent(String content) { + this.content = content; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SimpleChapter.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SimpleChapter.java new file mode 100644 index 000000000..069e49f09 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SimpleChapter.java @@ -0,0 +1,16 @@ +package de.danoeh.antennapod.parser.feed.element; + +import de.danoeh.antennapod.model.feed.Chapter; + +public class SimpleChapter extends Chapter { + public static final int CHAPTERTYPE_SIMPLECHAPTER = 0; + + public SimpleChapter(long start, String title, String link, String imageUrl) { + super(start, title, link, imageUrl); + } + + @Override + public int getChapterType() { + return CHAPTERTYPE_SIMPLECHAPTER; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SyndElement.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SyndElement.java new file mode 100644 index 000000000..98dbe2801 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/element/SyndElement.java @@ -0,0 +1,22 @@ +package de.danoeh.antennapod.parser.feed.element; + +import de.danoeh.antennapod.parser.feed.namespace.Namespace; + +/** Defines a XML Element that is pushed on the tagstack */ +public class SyndElement { + private final String name; + private final Namespace namespace; + + public SyndElement(String name, Namespace namespace) { + this.name = name; + this.namespace = namespace; + } + + public Namespace getNamespace() { + return namespace; + } + + public String getName() { + return name; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Atom.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Atom.java new file mode 100644 index 000000000..ef802c355 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Atom.java @@ -0,0 +1,224 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import android.text.TextUtils; +import android.util.Log; + +import de.danoeh.antennapod.model.feed.FeedFunding; +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.AtomText; +import de.danoeh.antennapod.parser.feed.util.DateUtils; +import de.danoeh.antennapod.parser.feed.util.SyndStringUtils; +import org.xml.sax.Attributes; + +import de.danoeh.antennapod.model.feed.FeedItem; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.util.SyndTypeUtils; + +public class Atom extends Namespace { + private static final String TAG = "NSAtom"; + public static final String NSTAG = "atom"; + public static final String NSURI = "http://www.w3.org/2005/Atom"; + + private static final String FEED = "feed"; + private static final String ID = "id"; + private static final String TITLE = "title"; + private static final String ENTRY = "entry"; + private static final String LINK = "link"; + private static final String UPDATED = "updated"; + private static final String AUTHOR = "author"; + private static final String AUTHOR_NAME = "name"; + private static final String CONTENT = "content"; + private static final String SUMMARY = "summary"; + private static final String IMAGE_LOGO = "logo"; + private static final String IMAGE_ICON = "icon"; + private static final String SUBTITLE = "subtitle"; + private static final String PUBLISHED = "published"; + + private static final String TEXT_TYPE = "type"; + // Link + private static final String LINK_HREF = "href"; + private static final String LINK_REL = "rel"; + private static final String LINK_TYPE = "type"; + private static final String LINK_TITLE = "title"; + private static final String LINK_LENGTH = "length"; + // rel-values + private static final String LINK_REL_ALTERNATE = "alternate"; + private static final String LINK_REL_ARCHIVES = "archives"; + private static final String LINK_REL_ENCLOSURE = "enclosure"; + private static final String LINK_REL_PAYMENT = "payment"; + private static final String LINK_REL_NEXT = "next"; + // type-values + private static final String LINK_TYPE_ATOM = "application/atom+xml"; + private static final String LINK_TYPE_HTML = "text/html"; + private static final String LINK_TYPE_XHTML = "application/xml+xhtml"; + + private static final String LINK_TYPE_RSS = "application/rss+xml"; + + /** + * Regexp to test whether an Element is a Text Element. + */ + private static final String isText = TITLE + "|" + CONTENT + "|" + + SUBTITLE + "|" + SUMMARY; + + private static final String isFeed = FEED + "|" + Rss20.CHANNEL; + private static final String isFeedItem = ENTRY + "|" + Rss20.ITEM; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + if (ENTRY.equals(localName)) { + state.setCurrentItem(new FeedItem()); + state.getItems().add(state.getCurrentItem()); + state.getCurrentItem().setFeed(state.getFeed()); + } else if (localName.matches(isText)) { + String type = attributes.getValue(TEXT_TYPE); + return new AtomText(localName, this, type); + } else if (LINK.equals(localName)) { + String href = attributes.getValue(LINK_HREF); + String rel = attributes.getValue(LINK_REL); + SyndElement parent = state.getTagstack().peek(); + if (parent.getName().matches(isFeedItem)) { + if (rel == null || LINK_REL_ALTERNATE.equals(rel)) { + state.getCurrentItem().setLink(href); + } else if (LINK_REL_ENCLOSURE.equals(rel)) { + String strSize = attributes.getValue(LINK_LENGTH); + long size = 0; + try { + if (strSize != null) { + size = Long.parseLong(strSize); + } + } catch (NumberFormatException e) { + Log.d(TAG, "Length attribute could not be parsed."); + } + String type = attributes.getValue(LINK_TYPE); + + if (type == null) { + type = SyndTypeUtils.getMimeTypeFromUrl(href); + } + + FeedItem currItem = state.getCurrentItem(); + if (SyndTypeUtils.enclosureTypeValid(type) && currItem != null && !currItem.hasMedia()) { + currItem.setMedia(new FeedMedia(currItem, href, size, type)); + } + } else if (LINK_REL_PAYMENT.equals(rel)) { + state.getCurrentItem().setPaymentLink(href); + } + } else if (parent.getName().matches(isFeed)) { + if (rel == null || LINK_REL_ALTERNATE.equals(rel)) { + String type = attributes.getValue(LINK_TYPE); + /* + * Use as link if a) no type-attribute is given and + * feed-object has no link yet b) type of link is + * LINK_TYPE_HTML or LINK_TYPE_XHTML + */ + if (state.getFeed() != null && + ((type == null && state.getFeed().getLink() == null) || + (LINK_TYPE_HTML.equals(type) || LINK_TYPE_XHTML.equals(type)))) { + state.getFeed().setLink(href); + } else if (LINK_TYPE_ATOM.equals(type) || LINK_TYPE_RSS.equals(type)) { + // treat as podlove alternate feed + String title = attributes.getValue(LINK_TITLE); + if (TextUtils.isEmpty(title)) { + title = href; + } + state.addAlternateFeedUrl(title, href); + } + } else if (LINK_REL_ARCHIVES.equals(rel) && state.getFeed() != null) { + String type = attributes.getValue(LINK_TYPE); + if (LINK_TYPE_ATOM.equals(type) || LINK_TYPE_RSS.equals(type)) { + String title = attributes.getValue(LINK_TITLE); + if (TextUtils.isEmpty(title)) { + title = href; + } + state.addAlternateFeedUrl(title, href); + } else if (LINK_TYPE_HTML.equals(type) || LINK_TYPE_XHTML.equals(type)) { + //A Link such as to a directory such as iTunes + } + } else if (LINK_REL_PAYMENT.equals(rel) && state.getFeed() != null) { + state.getFeed().addPayment(new FeedFunding(href, "")); + } else if (LINK_REL_NEXT.equals(rel) && state.getFeed() != null) { + state.getFeed().setPaged(true); + state.getFeed().setNextPageLink(href); + } + } + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (ENTRY.equals(localName)) { + if (state.getCurrentItem() != null && + state.getTempObjects().containsKey(Itunes.DURATION)) { + FeedItem currentItem = state.getCurrentItem(); + if (currentItem.hasMedia()) { + Integer duration = (Integer) state.getTempObjects().get(Itunes.DURATION); + currentItem.getMedia().setDuration(duration); + } + state.getTempObjects().remove(Itunes.DURATION); + } + state.setCurrentItem(null); + } + + if (state.getTagstack().size() >= 2) { + AtomText textElement = null; + String contentRaw; + if (state.getContentBuf() != null) { + contentRaw = state.getContentBuf().toString(); + } else { + contentRaw = ""; + } + String content = SyndStringUtils.trimAllWhitespace(contentRaw); + SyndElement topElement = state.getTagstack().peek(); + String top = topElement.getName(); + SyndElement secondElement = state.getSecondTag(); + String second = secondElement.getName(); + + if (top.matches(isText)) { + textElement = (AtomText) topElement; + textElement.setContent(content); + } + + if (ID.equals(top)) { + if (FEED.equals(second) && state.getFeed() != null) { + state.getFeed().setFeedIdentifier(contentRaw); + } else if (ENTRY.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setItemIdentifier(contentRaw); + } + } else if (TITLE.equals(top) && textElement != null) { + if (FEED.equals(second) && state.getFeed() != null) { + state.getFeed().setTitle(textElement.getProcessedContent()); + } else if (ENTRY.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setTitle(textElement.getProcessedContent()); + } + } else if (SUBTITLE.equals(top) && FEED.equals(second) && textElement != null && + state.getFeed() != null) { + state.getFeed().setDescription(textElement.getProcessedContent()); + } else if (CONTENT.equals(top) && ENTRY.equals(second) && textElement != null && + state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(textElement.getProcessedContent()); + } else if (SUMMARY.equals(top) && ENTRY.equals(second) && textElement != null + && state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(textElement.getProcessedContent()); + } else if (UPDATED.equals(top) && ENTRY.equals(second) && state.getCurrentItem() != null && + state.getCurrentItem().getPubDate() == null) { + state.getCurrentItem().setPubDate(DateUtils.parseOrNullIfFuture(content)); + } else if (PUBLISHED.equals(top) && ENTRY.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setPubDate(DateUtils.parseOrNullIfFuture(content)); + } else if (IMAGE_LOGO.equals(top) && state.getFeed() != null && state.getFeed().getImageUrl() == null) { + state.getFeed().setImageUrl(content); + } else if (IMAGE_ICON.equals(top) && state.getFeed() != null) { + state.getFeed().setImageUrl(content); + } else if (AUTHOR_NAME.equals(top) && AUTHOR.equals(second) && + state.getFeed() != null && state.getCurrentItem() == null) { + String currentName = state.getFeed().getAuthor(); + if (currentName == null) { + state.getFeed().setAuthor(content); + } else { + state.getFeed().setAuthor(currentName + ", " + content); + } + } + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Content.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Content.java new file mode 100644 index 000000000..3a7d5ac3a --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Content.java @@ -0,0 +1,24 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import org.xml.sax.Attributes; + +public class Content extends Namespace { + public static final String NSTAG = "content"; + public static final String NSURI = "http://purl.org/rss/1.0/modules/content/"; + + private static final String ENCODED = "encoded"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) { + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (ENCODED.equals(localName) && state.getCurrentItem() != null && state.getContentBuf() != null) { + state.getCurrentItem().setDescriptionIfLonger(state.getContentBuf().toString()); + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/DublinCore.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/DublinCore.java new file mode 100644 index 000000000..003f72e9b --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/DublinCore.java @@ -0,0 +1,38 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.util.DateUtils; +import org.xml.sax.Attributes; + +import de.danoeh.antennapod.model.feed.FeedItem; + +public class DublinCore extends Namespace { + private static final String TAG = "NSDublinCore"; + public static final String NSTAG = "dc"; + public static final String NSURI = "http://purl.org/dc/elements/1.1/"; + + private static final String ITEM = "item"; + private static final String DATE = "date"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (state.getCurrentItem() != null && state.getContentBuf() != null && + state.getTagstack() != null && state.getTagstack().size() >= 2) { + FeedItem currentItem = state.getCurrentItem(); + String top = state.getTagstack().peek().getName(); + String second = state.getSecondTag().getName(); + if (DATE.equals(top) && ITEM.equals(second)) { + String content = state.getContentBuf().toString(); + currentItem.setPubDate(DateUtils.parseOrNullIfFuture(content)); + } + } + } + +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java new file mode 100644 index 000000000..5f47f8377 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Itunes.java @@ -0,0 +1,114 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import android.text.TextUtils; +import android.util.Log; + +import androidx.core.text.HtmlCompat; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.util.DurationParser; +import org.xml.sax.Attributes; + + +public class Itunes extends Namespace { + + public static final String NSTAG = "itunes"; + public static final String NSURI = "http://www.itunes.com/dtds/podcast-1.0.dtd"; + + private static final String IMAGE = "image"; + private static final String IMAGE_HREF = "href"; + + private static final String AUTHOR = "author"; + public static final String DURATION = "duration"; + private static final String SUBTITLE = "subtitle"; + private static final String SUMMARY = "summary"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + if (IMAGE.equals(localName)) { + String url = attributes.getValue(IMAGE_HREF); + + if (state.getCurrentItem() != null) { + state.getCurrentItem().setImageUrl(url); + } else { + // this is the feed image + // prefer to all other images + if (!TextUtils.isEmpty(url)) { + state.getFeed().setImageUrl(url); + } + } + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (state.getContentBuf() == null) { + return; + } + + if (AUTHOR.equals(localName)) { + parseAuthor(state); + } else if (DURATION.equals(localName)) { + parseDuration(state); + } else if (SUBTITLE.equals(localName)) { + parseSubtitle(state); + } else if (SUMMARY.equals(localName)) { + SyndElement secondElement = state.getSecondTag(); + parseSummary(state, secondElement.getName()); + } + } + + private void parseAuthor(HandlerState state) { + if (state.getFeed() != null) { + String author = state.getContentBuf().toString(); + state.getFeed().setAuthor(HtmlCompat.fromHtml(author, + HtmlCompat.FROM_HTML_MODE_LEGACY).toString()); + } + } + + private void parseDuration(HandlerState state) { + String durationStr = state.getContentBuf().toString(); + if (TextUtils.isEmpty(durationStr)) { + return; + } + + try { + long durationMs = DurationParser.inMillis(durationStr); + state.getTempObjects().put(DURATION, (int) durationMs); + } catch (NumberFormatException e) { + Log.e(NSTAG, String.format("Duration '%s' could not be parsed", durationStr)); + } + } + + private void parseSubtitle(HandlerState state) { + String subtitle = state.getContentBuf().toString(); + if (TextUtils.isEmpty(subtitle)) { + return; + } + if (state.getCurrentItem() != null) { + if (TextUtils.isEmpty(state.getCurrentItem().getDescription())) { + state.getCurrentItem().setDescriptionIfLonger(subtitle); + } + } else { + if (state.getFeed() != null && TextUtils.isEmpty(state.getFeed().getDescription())) { + state.getFeed().setDescription(subtitle); + } + } + } + + private void parseSummary(HandlerState state, String secondElementName) { + String summary = state.getContentBuf().toString(); + if (TextUtils.isEmpty(summary)) { + return; + } + + if (state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(summary); + } else if (Rss20.CHANNEL.equals(secondElementName) && state.getFeed() != null) { + state.getFeed().setDescription(summary); + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Media.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Media.java new file mode 100644 index 000000000..f480a0417 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Media.java @@ -0,0 +1,133 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import android.text.TextUtils; +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import org.xml.sax.Attributes; + +import java.util.concurrent.TimeUnit; + +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.parser.feed.element.AtomText; +import de.danoeh.antennapod.parser.feed.util.SyndTypeUtils; + +/** Processes tags from the http://search.yahoo.com/mrss/ namespace. */ +public class Media extends Namespace { + private static final String TAG = "NSMedia"; + + public static final String NSTAG = "media"; + public static final String NSURI = "http://search.yahoo.com/mrss/"; + + private static final String CONTENT = "content"; + private static final String DOWNLOAD_URL = "url"; + private static final String SIZE = "fileSize"; + private static final String MIME_TYPE = "type"; + private static final String DURATION = "duration"; + private static final String DEFAULT = "isDefault"; + private static final String MEDIUM = "medium"; + + private static final String MEDIUM_IMAGE = "image"; + private static final String MEDIUM_AUDIO = "audio"; + private static final String MEDIUM_VIDEO = "video"; + + private static final String IMAGE = "thumbnail"; + private static final String IMAGE_URL = "url"; + + private static final String DESCRIPTION = "description"; + private static final String DESCRIPTION_TYPE = "type"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + if (CONTENT.equals(localName)) { + String url = attributes.getValue(DOWNLOAD_URL); + String type = attributes.getValue(MIME_TYPE); + String defaultStr = attributes.getValue(DEFAULT); + String medium = attributes.getValue(MEDIUM); + boolean validTypeMedia = false; + boolean validTypeImage = false; + boolean isDefault = "true".equals(defaultStr); + String guessedType = SyndTypeUtils.getMimeTypeFromUrl(url); + + if (MEDIUM_AUDIO.equals(medium)) { + validTypeMedia = true; + type = "audio/*"; + } else if (MEDIUM_VIDEO.equals(medium)) { + validTypeMedia = true; + type = "video/*"; + } else if (MEDIUM_IMAGE.equals(medium) && (guessedType == null + || (!guessedType.startsWith("audio/") && !guessedType.startsWith("video/")))) { + // Apparently, some publishers explicitly specify the audio file as an image + validTypeImage = true; + type = "image/*"; + } else { + if (type == null) { + type = guessedType; + } + + if (SyndTypeUtils.enclosureTypeValid(type)) { + validTypeMedia = true; + } else if (SyndTypeUtils.imageTypeValid(type)) { + validTypeImage = true; + } + } + + if (state.getCurrentItem() != null && (state.getCurrentItem().getMedia() == null || isDefault) + && url != null && validTypeMedia) { + long size = 0; + String sizeStr = attributes.getValue(SIZE); + try { + size = Long.parseLong(sizeStr); + } catch (NumberFormatException e) { + Log.e(TAG, "Size \"" + sizeStr + "\" could not be parsed."); + } + + int durationMs = 0; + String durationStr = attributes.getValue(DURATION); + if (!TextUtils.isEmpty(durationStr)) { + try { + long duration = Long.parseLong(durationStr); + durationMs = (int) TimeUnit.MILLISECONDS.convert(duration, TimeUnit.SECONDS); + } catch (NumberFormatException e) { + Log.e(TAG, "Duration \"" + durationStr + "\" could not be parsed"); + } + } + FeedMedia media = new FeedMedia(state.getCurrentItem(), url, size, type); + if (durationMs > 0) { + media.setDuration(durationMs); + } + state.getCurrentItem().setMedia(media); + } else if (state.getCurrentItem() != null && url != null && validTypeImage) { + state.getCurrentItem().setImageUrl(url); + } + } else if (IMAGE.equals(localName)) { + String url = attributes.getValue(IMAGE_URL); + if (url != null) { + if (state.getCurrentItem() != null) { + state.getCurrentItem().setImageUrl(url); + } else { + if (state.getFeed().getImageUrl() == null) { + state.getFeed().setImageUrl(url); + } + } + } + } else if (DESCRIPTION.equals(localName)) { + String type = attributes.getValue(DESCRIPTION_TYPE); + return new AtomText(localName, this, type); + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (DESCRIPTION.equals(localName)) { + String content = state.getContentBuf().toString(); + if (state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(content); + } + } + } +} + diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Namespace.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Namespace.java new file mode 100644 index 000000000..5273c6731 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Namespace.java @@ -0,0 +1,19 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import org.xml.sax.Attributes; + +public abstract class Namespace { + public static final String NSTAG = null; + public static final String NSURI = null; + + /** Called by a Feedhandler when in startElement and it detects a namespace element + * @return The SyndElement to push onto the stack + * */ + public abstract SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes); + + /** Called by a Feedhandler when in endElement and it detects a namespace element + * */ + public abstract void handleElementEnd(String localName, HandlerState state); +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java new file mode 100644 index 000000000..1d4a91192 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/PodcastIndex.java @@ -0,0 +1,39 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import org.jsoup.helper.StringUtil; +import org.xml.sax.Attributes; +import de.danoeh.antennapod.model.feed.FeedFunding; + +public class PodcastIndex extends Namespace { + + public static final String NSTAG = "podcast"; + public static final String NSURI = "https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md"; + public static final String NSURI2 = "https://podcastindex.org/namespace/1.0"; + private static final String URL = "url"; + private static final String FUNDING = "funding"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + if (FUNDING.equals(localName)) { + String href = attributes.getValue(URL); + FeedFunding funding = new FeedFunding(href, ""); + state.setCurrentFunding(funding); + state.getFeed().addPayment(state.getCurrentFunding()); + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (state.getContentBuf() == null) { + return; + } + String content = state.getContentBuf().toString(); + if (FUNDING.equals(localName) && state.getCurrentFunding() != null && !StringUtil.isBlank(content)) { + state.getCurrentFunding().setContent(content); + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java new file mode 100644 index 000000000..a49cd16dd --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/Rss20.java @@ -0,0 +1,148 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import android.text.TextUtils; +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.util.DateUtils; +import de.danoeh.antennapod.parser.feed.util.SyndStringUtils; +import org.xml.sax.Attributes; + +import de.danoeh.antennapod.model.feed.FeedItem; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.parser.feed.util.SyndTypeUtils; + +import java.util.Locale; + +/** + * SAX-Parser for reading RSS-Feeds. + */ +public class Rss20 extends Namespace { + + private static final String TAG = "NSRSS20"; + + public static final String CHANNEL = "channel"; + public static final String ITEM = "item"; + private static final String GUID = "guid"; + private static final String TITLE = "title"; + private static final String LINK = "link"; + private static final String DESCR = "description"; + private static final String PUBDATE = "pubDate"; + private static final String ENCLOSURE = "enclosure"; + private static final String IMAGE = "image"; + private static final String URL = "url"; + private static final String LANGUAGE = "language"; + + private static final String ENC_URL = "url"; + private static final String ENC_LEN = "length"; + private static final String ENC_TYPE = "type"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, + Attributes attributes) { + if (ITEM.equals(localName)) { + state.setCurrentItem(new FeedItem()); + state.getItems().add(state.getCurrentItem()); + state.getCurrentItem().setFeed(state.getFeed()); + + } else if (ENCLOSURE.equals(localName)) { + String type = attributes.getValue(ENC_TYPE); + String url = attributes.getValue(ENC_URL); + + boolean validType = SyndTypeUtils.enclosureTypeValid(type); + if (!validType) { + type = SyndTypeUtils.getMimeTypeFromUrl(url); + validType = SyndTypeUtils.enclosureTypeValid(type); + } + + boolean validUrl = !TextUtils.isEmpty(url); + if (state.getCurrentItem() != null && state.getCurrentItem().getMedia() == null + && validType && validUrl) { + long size = 0; + try { + size = Long.parseLong(attributes.getValue(ENC_LEN)); + if (size < 16384) { + // less than 16kb is suspicious, check manually + size = 0; + } + } catch (NumberFormatException e) { + Log.d(TAG, "Length attribute could not be parsed."); + } + FeedMedia media = new FeedMedia(state.getCurrentItem(), url, size, type); + state.getCurrentItem().setMedia(media); + } + + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + if (ITEM.equals(localName)) { + if (state.getCurrentItem() != null) { + FeedItem currentItem = state.getCurrentItem(); + // the title tag is optional in RSS 2.0. The description is used + // as a title if the item has no title-tag. + if (currentItem.getTitle() == null) { + currentItem.setTitle(currentItem.getDescription()); + } + + if (state.getTempObjects().containsKey(Itunes.DURATION)) { + if (currentItem.hasMedia()) { + Integer duration = (Integer) state.getTempObjects().get(Itunes.DURATION); + currentItem.getMedia().setDuration(duration); + } + state.getTempObjects().remove(Itunes.DURATION); + } + } + state.setCurrentItem(null); + } else if (state.getTagstack().size() >= 2 && state.getContentBuf() != null) { + String contentRaw = state.getContentBuf().toString(); + String content = SyndStringUtils.trimAllWhitespace(contentRaw); + SyndElement topElement = state.getTagstack().peek(); + String top = topElement.getName(); + SyndElement secondElement = state.getSecondTag(); + String second = secondElement.getName(); + String third = null; + if (state.getTagstack().size() >= 3) { + third = state.getThirdTag().getName(); + } + if (GUID.equals(top) && ITEM.equals(second)) { + // some feed creators include an empty or non-standard guid-element in their feed, + // which should be ignored + if (!TextUtils.isEmpty(contentRaw) && state.getCurrentItem() != null) { + state.getCurrentItem().setItemIdentifier(contentRaw); + } + } else if (TITLE.equals(top)) { + if (ITEM.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setTitle(content); + } else if (CHANNEL.equals(second) && state.getFeed() != null) { + state.getFeed().setTitle(content); + } + } else if (LINK.equals(top)) { + if (CHANNEL.equals(second) && state.getFeed() != null) { + state.getFeed().setLink(content); + } else if (ITEM.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setLink(content); + } + } else if (PUBDATE.equals(top) && ITEM.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setPubDate(DateUtils.parseOrNullIfFuture(content)); + } else if (URL.equals(top) && IMAGE.equals(second) && CHANNEL.equals(third)) { + // prefer itunes:image + if (state.getFeed() != null && state.getFeed().getImageUrl() == null) { + state.getFeed().setImageUrl(content); + } + } else if (DESCR.equals(localName)) { + if (CHANNEL.equals(second) && state.getFeed() != null) { + state.getFeed().setDescription(content); + } else if (ITEM.equals(second) && state.getCurrentItem() != null) { + state.getCurrentItem().setDescriptionIfLonger(content); + } + } else if (LANGUAGE.equals(localName) && state.getFeed() != null) { + state.getFeed().setLanguage(content.toLowerCase(Locale.US)); + } + } + } + +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/SimpleChapters.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/SimpleChapters.java new file mode 100644 index 000000000..e1912ed45 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/namespace/SimpleChapters.java @@ -0,0 +1,54 @@ +package de.danoeh.antennapod.parser.feed.namespace; + +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.HandlerState; +import de.danoeh.antennapod.parser.feed.element.SimpleChapter; +import de.danoeh.antennapod.parser.feed.element.SyndElement; +import de.danoeh.antennapod.parser.feed.util.DateUtils; +import org.xml.sax.Attributes; + +import java.util.ArrayList; + +import de.danoeh.antennapod.model.feed.FeedItem; + +public class SimpleChapters extends Namespace { + private static final String TAG = "NSSimpleChapters"; + + public static final String NSTAG = "psc|sc"; + public static final String NSURI = "http://podlove.org/simple-chapters"; + + private static final String CHAPTERS = "chapters"; + private static final String CHAPTER = "chapter"; + private static final String START = "start"; + private static final String TITLE = "title"; + private static final String HREF = "href"; + private static final String IMAGE = "image"; + + @Override + public SyndElement handleElementStart(String localName, HandlerState state, Attributes attributes) { + FeedItem currentItem = state.getCurrentItem(); + if (currentItem != null) { + if (localName.equals(CHAPTERS)) { + currentItem.setChapters(new ArrayList<>()); + } else if (localName.equals(CHAPTER)) { + try { + long start = DateUtils.parseTimeString(attributes.getValue(START)); + String title = attributes.getValue(TITLE); + String link = attributes.getValue(HREF); + String imageUrl = attributes.getValue(IMAGE); + SimpleChapter chapter = new SimpleChapter(start, title, link, imageUrl); + currentItem.getChapters().add(chapter); + } catch (NumberFormatException e) { + Log.e(TAG, "Unable to read chapter", e); + } + } + } + return new SyndElement(localName, this); + } + + @Override + public void handleElementEnd(String localName, HandlerState state) { + } + +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DateUtils.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DateUtils.java new file mode 100644 index 000000000..9b7f48769 --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DateUtils.java @@ -0,0 +1,163 @@ +package de.danoeh.antennapod.parser.feed.util; + +import android.util.Log; + +import androidx.annotation.Nullable; +import org.apache.commons.lang3.StringUtils; + +import java.text.ParsePosition; +import java.text.SimpleDateFormat; +import java.util.Date; +import java.util.Locale; +import java.util.TimeZone; + +/** + * Parses several date formats. + */ +public class DateUtils { + + private DateUtils() { + + } + + private static final String TAG = "DateUtils"; + private static final TimeZone defaultTimezone = TimeZone.getTimeZone("GMT"); + + public static Date parse(final String input) { + if (input == null) { + throw new IllegalArgumentException("Date must not be null"); + } + String date = input.trim().replace('/', '-').replaceAll("( ){2,}+", " "); + + // remove colon from timezone to avoid differences between Android and Java SimpleDateFormat + date = date.replaceAll("([+-]\\d\\d):(\\d\\d)$", "$1$2"); + + // CEST is widely used but not in the "ISO 8601 Time zone" list. Let's hack around. + date = date.replaceAll("CEST$", "+0200"); + date = date.replaceAll("CET$", "+0100"); + + // some generators use "Sept" for September + date = date.replaceAll("\\bSept\\b", "Sep"); + + // if datetime is more precise than seconds, make sure the value is in ms + if (date.contains(".")) { + int start = date.indexOf('.'); + int current = start + 1; + while (current < date.length() && Character.isDigit(date.charAt(current))) { + current++; + } + // even more precise than microseconds: discard further decimal places + if (current - start > 4) { + if (current < date.length() - 1) { + date = date.substring(0, start + 4) + date.substring(current); + } else { + date = date.substring(0, start + 4); + } + // less than 4 decimal places: pad to have a consistent format for the parser + } else if (current - start < 4) { + if (current < date.length() - 1) { + date = date.substring(0, current) + StringUtils.repeat("0", 4 - (current - start)) + + date.substring(current); + } else { + date = date.substring(0, current) + StringUtils.repeat("0", 4 - (current - start)); + } + } + } + final String[] patterns = { + "dd MMM yy HH:mm:ss Z", + "dd MMM yy HH:mm Z", + "EEE, dd MMM yyyy HH:mm:ss Z", + "EEE, dd MMM yyyy HH:mm:ss", + "EEE, dd MMMM yyyy HH:mm:ss Z", + "EEE, dd MMMM yyyy HH:mm:ss", + "EEEE, dd MMM yyyy HH:mm:ss Z", + "EEEE, dd MMM yy HH:mm:ss Z", + "EEEE, dd MMM yyyy HH:mm:ss", + "EEEE, dd MMM yy HH:mm:ss", + "EEE MMM d HH:mm:ss yyyy", + "EEE, dd MMM yyyy HH:mm Z", + "EEE, dd MMM yyyy HH:mm", + "EEE, dd MMMM yyyy HH:mm Z", + "EEE, dd MMMM yyyy HH:mm", + "EEEE, dd MMM yyyy HH:mm Z", + "EEEE, dd MMM yy HH:mm Z", + "EEEE, dd MMM yyyy HH:mm", + "EEEE, dd MMM yy HH:mm", + "EEE MMM d HH:mm yyyy", + "yyyy-MM-dd'T'HH:mm:ss", + "yyyy-MM-dd'T'HH:mm:ss.SSS Z", + "yyyy-MM-dd'T'HH:mm:ss.SSS", + "yyyy-MM-dd'T'HH:mm:ssZ", + "yyyy-MM-dd'T'HH:mm:ss'Z'", + "yyyy-MM-dd'T'HH:mm:ss.SSSZ", + "yyyy-MM-ddZ", + "yyyy-MM-dd", + "EEE d MMM yyyy HH:mm:ss 'GMT'Z (z)" + }; + + SimpleDateFormat parser = new SimpleDateFormat("", Locale.US); + parser.setLenient(false); + parser.setTimeZone(defaultTimezone); + + ParsePosition pos = new ParsePosition(0); + for (String pattern : patterns) { + parser.applyPattern(pattern); + pos.setIndex(0); + try { + Date result = parser.parse(date, pos); + if (result != null && pos.getIndex() == date.length()) { + return result; + } + } catch (Exception e) { + Log.e(TAG, Log.getStackTraceString(e)); + } + } + + // if date string starts with a weekday, try parsing date string without it + if (date.matches("^\\w+, .*$")) { + return parse(date.substring(date.indexOf(',') + 1)); + } + + Log.d(TAG, "Could not parse date string \"" + input + "\" [" + date + "]"); + return null; + } + + /** + * Parses the date but if the date is in the future, returns null. + */ + @Nullable + public static Date parseOrNullIfFuture(final String input) { + Date date = parse(input); + if (date == null) { + return null; + } + Date now = new Date(); + if (date.after(now)) { + return null; + } + return date; + } + + /** + * Takes a string of the form [HH:]MM:SS[.mmm] and converts it to + * milliseconds. + * + * @throws java.lang.NumberFormatException if the number segments contain invalid numbers. + */ + public static long parseTimeString(final String time) { + String[] parts = time.split(":"); + long result = 0; + int idx = 0; + if (parts.length == 3) { + // string has hours + result += Integer.parseInt(parts[idx]) * 3600000L; + idx++; + } + if (parts.length >= 2) { + result += Integer.parseInt(parts[idx]) * 60000L; + idx++; + result += (long) (Float.parseFloat(parts[idx]) * 1000L); + } + return result; + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DurationParser.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DurationParser.java new file mode 100644 index 000000000..af79f542a --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/DurationParser.java @@ -0,0 +1,37 @@ +package de.danoeh.antennapod.parser.feed.util; + +import static java.util.concurrent.TimeUnit.HOURS; +import static java.util.concurrent.TimeUnit.MINUTES; +import static java.util.concurrent.TimeUnit.SECONDS; + +public class DurationParser { + public static long inMillis(String durationStr) throws NumberFormatException { + String[] parts = durationStr.trim().split(":"); + + if (parts.length == 1) { + return toMillis(parts[0]); + } else if (parts.length == 2) { + return toMillis("0", parts[0], parts[1]); + } else if (parts.length == 3) { + return toMillis(parts[0], parts[1], parts[2]); + } else { + throw new NumberFormatException(); + } + } + + private static long toMillis(String hours, String minutes, String seconds) { + return HOURS.toMillis(Long.parseLong(hours)) + + MINUTES.toMillis(Long.parseLong(minutes)) + + toMillis(seconds); + } + + private static long toMillis(String seconds) { + if (seconds.contains(".")) { + float value = Float.parseFloat(seconds); + float millis = value % 1; + return SECONDS.toMillis((long) value) + (long) (millis * 1000); + } else { + return SECONDS.toMillis(Long.parseLong(seconds)); + } + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndStringUtils.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndStringUtils.java new file mode 100644 index 000000000..403d1671f --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndStringUtils.java @@ -0,0 +1,14 @@ +package de.danoeh.antennapod.parser.feed.util; + +public class SyndStringUtils { + private SyndStringUtils() { + + } + + /** + * Trims all whitespace from beginning and ending of a String. {{@link String#trim()}} only trims spaces. + */ + public static String trimAllWhitespace(String string) { + return string.replaceAll("(^\\s*)|(\\s*$)", ""); + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndTypeUtils.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndTypeUtils.java new file mode 100644 index 000000000..2e6cf864f --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/SyndTypeUtils.java @@ -0,0 +1,44 @@ +package de.danoeh.antennapod.parser.feed.util; + +import android.webkit.MimeTypeMap; +import org.apache.commons.io.FilenameUtils; + +/** + * Utility class for handling MIME-Types of enclosures. + * */ +public class SyndTypeUtils { + private SyndTypeUtils() { + + } + + public static boolean enclosureTypeValid(String type) { + if (type == null) { + return false; + } else { + return type.startsWith("audio/") + || type.startsWith("video/") + || type.equals("application/ogg") + || type.equals("application/octet-stream"); + } + } + + public static boolean imageTypeValid(String type) { + if (type == null) { + return false; + } else { + return type.startsWith("image/"); + } + } + + /** + * Should be used if mime-type of enclosure tag is not supported. This + * method will return the mime-type of the file extension. + */ + public static String getMimeTypeFromUrl(String url) { + if (url == null) { + return null; + } + String extension = FilenameUtils.getExtension(url); + return MimeTypeMap.getSingleton().getMimeTypeFromExtension(extension); + } +} diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/TypeGetter.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/TypeGetter.java new file mode 100644 index 000000000..12834f94f --- /dev/null +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/util/TypeGetter.java @@ -0,0 +1,121 @@ +package de.danoeh.antennapod.parser.feed.util; + +import android.util.Log; + +import de.danoeh.antennapod.parser.feed.UnsupportedFeedtypeException; +import org.apache.commons.io.input.XmlStreamReader; +import org.jsoup.Jsoup; +import org.xmlpull.v1.XmlPullParser; +import org.xmlpull.v1.XmlPullParserException; +import org.xmlpull.v1.XmlPullParserFactory; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.Reader; + +import de.danoeh.antennapod.model.feed.Feed; + +/** Gets the type of a specific feed by reading the root element. */ +public class TypeGetter { + private static final String TAG = "TypeGetter"; + + public enum Type { + RSS20, RSS091, ATOM, INVALID + } + + private static final String ATOM_ROOT = "feed"; + private static final String RSS_ROOT = "rss"; + + public Type getType(Feed feed) throws UnsupportedFeedtypeException { + XmlPullParserFactory factory; + if (feed.getFile_url() != null) { + Reader reader = null; + try { + factory = XmlPullParserFactory.newInstance(); + factory.setNamespaceAware(true); + XmlPullParser xpp = factory.newPullParser(); + reader = createReader(feed); + xpp.setInput(reader); + int eventType = xpp.getEventType(); + + while (eventType != XmlPullParser.END_DOCUMENT) { + if (eventType == XmlPullParser.START_TAG) { + String tag = xpp.getName(); + switch (tag) { + case ATOM_ROOT: + feed.setType(Feed.TYPE_ATOM1); + Log.d(TAG, "Recognized type Atom"); + + String strLang = xpp.getAttributeValue("http://www.w3.org/XML/1998/namespace", "lang"); + if (strLang != null) { + feed.setLanguage(strLang); + } + + return Type.ATOM; + case RSS_ROOT: + String strVersion = xpp.getAttributeValue(null, "version"); + if (strVersion == null) { + feed.setType(Feed.TYPE_RSS2); + Log.d(TAG, "Assuming type RSS 2.0"); + return Type.RSS20; + } else if (strVersion.equals("2.0")) { + feed.setType(Feed.TYPE_RSS2); + Log.d(TAG, "Recognized type RSS 2.0"); + return Type.RSS20; + } else if (strVersion.equals("0.91") || strVersion.equals("0.92")) { + Log.d(TAG, "Recognized type RSS 0.91/0.92"); + return Type.RSS091; + } + throw new UnsupportedFeedtypeException("Unsupported rss version"); + default: + Log.d(TAG, "Type is invalid"); + throw new UnsupportedFeedtypeException(Type.INVALID, tag); + } + } else { + eventType = xpp.next(); + } + } + } catch (XmlPullParserException e) { + e.printStackTrace(); + // XML document might actually be a HTML document -> try to parse as HTML + String rootElement = null; + try { + if (Jsoup.parse(new File(feed.getFile_url()), null) != null) { + rootElement = "html"; + } + } catch (IOException e1) { + e1.printStackTrace(); + } + throw new UnsupportedFeedtypeException(Type.INVALID, rootElement); + + } catch (IOException e) { + e.printStackTrace(); + } finally { + if (reader != null) { + try { + reader.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + Log.d(TAG, "Type is invalid"); + throw new UnsupportedFeedtypeException(Type.INVALID); + } + + private Reader createReader(Feed feed) { + Reader reader; + try { + reader = new XmlStreamReader(new File(feed.getFile_url())); + } catch (FileNotFoundException e) { + e.printStackTrace(); + return null; + } catch (IOException e) { + e.printStackTrace(); + return null; + } + return reader; + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/element/AtomTextTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/element/AtomTextTest.java new file mode 100644 index 000000000..2ec91ab1d --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/element/AtomTextTest.java @@ -0,0 +1,37 @@ +package de.danoeh.antennapod.parser.feed.element.element; + +import de.danoeh.antennapod.parser.feed.element.AtomText; +import de.danoeh.antennapod.parser.feed.namespace.Atom; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; + +import static org.junit.Assert.assertEquals; + +/** + * Unit test for {@link AtomText}. + */ +@RunWith(RobolectricTestRunner.class) +public class AtomTextTest { + + private static final String[][] TEST_DATA = { + {">", ">"}, + {">", ">"}, + {"<Français>", "<Français>"}, + {"ßÄÖÜ", "ßÄÖÜ"}, + {""", "\""}, + {"ß", "ß"}, + {"’", "’"}, + {"‰", "‰"}, + {"€", "€"} + }; + + @Test + public void testProcessingHtml() { + for (String[] pair : TEST_DATA) { + final AtomText atomText = new AtomText("", new Atom(), AtomText.TYPE_HTML); + atomText.setContent(pair[0]); + assertEquals(pair[1], atomText.getProcessedContent()); + } + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/AtomParserTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/AtomParserTest.java new file mode 100644 index 000000000..ba8aaf4f0 --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/AtomParserTest.java @@ -0,0 +1,98 @@ +package de.danoeh.antennapod.parser.feed.element.namespace; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; + +import java.io.File; +import java.util.Date; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.model.feed.FeedItem; +import de.danoeh.antennapod.model.feed.FeedMedia; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Tests for Atom feeds in FeedHandler. + */ +@RunWith(RobolectricTestRunner.class) +public class AtomParserTest { + + @Test + public void testAtomBasic() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-atom-testAtomBasic.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals(Feed.TYPE_ATOM1, feed.getType()); + assertEquals("title", feed.getTitle()); + assertEquals("http://example.com/feed", feed.getFeedIdentifier()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertEquals("http://example.com/payment", feed.getPaymentLinks().get(0).url); + assertEquals("http://example.com/picture", feed.getImageUrl()); + assertEquals(10, feed.getItems().size()); + for (int i = 0; i < feed.getItems().size(); i++) { + FeedItem item = feed.getItems().get(i); + assertEquals("http://example.com/item-" + i, item.getItemIdentifier()); + assertEquals("item-" + i, item.getTitle()); + assertNull(item.getDescription()); + assertEquals("http://example.com/items/" + i, item.getLink()); + assertEquals(new Date(i * 60000), item.getPubDate()); + assertNull(item.getPaymentLink()); + assertEquals("http://example.com/picture", item.getImageLocation()); + // media + assertTrue(item.hasMedia()); + FeedMedia media = item.getMedia(); + //noinspection ConstantConditions + assertEquals("http://example.com/media-" + i, media.getDownload_url()); + assertEquals(1024 * 1024, media.getSize()); + assertEquals("audio/mp3", media.getMime_type()); + // chapters + assertNull(item.getChapters()); + } + } + + @Test + public void testEmptyRelLinks() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-atom-testEmptyRelLinks.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals(Feed.TYPE_ATOM1, feed.getType()); + assertEquals("title", feed.getTitle()); + assertEquals("http://example.com/feed", feed.getFeedIdentifier()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertNull(feed.getPaymentLinks()); + assertEquals("http://example.com/picture", feed.getImageUrl()); + assertEquals(1, feed.getItems().size()); + + // feed entry + FeedItem item = feed.getItems().get(0); + assertEquals("http://example.com/item-0", item.getItemIdentifier()); + assertEquals("item-0", item.getTitle()); + assertNull(item.getDescription()); + assertEquals("http://example.com/items/0", item.getLink()); + assertEquals(new Date(0), item.getPubDate()); + assertNull(item.getPaymentLink()); + assertEquals("http://example.com/picture", item.getImageLocation()); + // media + assertFalse(item.hasMedia()); + // chapters + assertNull(item.getChapters()); + } + + @Test + public void testLogoWithWhitespace() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-atom-testLogoWithWhitespace.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals("title", feed.getTitle()); + assertEquals("http://example.com/feed", feed.getFeedIdentifier()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertEquals("http://example.com/payment", feed.getPaymentLinks().get(0).url); + assertEquals("https://example.com/image.png", feed.getImageUrl()); + assertEquals(0, feed.getItems().size()); + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/FeedParserTestHelper.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/FeedParserTestHelper.java new file mode 100644 index 000000000..5cc52d8cb --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/FeedParserTestHelper.java @@ -0,0 +1,36 @@ +package de.danoeh.antennapod.parser.feed.element.namespace; + +import androidx.annotation.NonNull; + +import java.io.File; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.parser.feed.FeedHandler; + +/** + * Tests for FeedHandler. + */ +public abstract class FeedParserTestHelper { + + /** + * Returns the File object for a file in the resources folder. + */ + @NonNull + static File getFeedFile(@NonNull String fileName) { + //noinspection ConstantConditions + return new File(FeedParserTestHelper.class.getClassLoader().getResource(fileName).getFile()); + } + + /** + * Runs the feed parser on the given file. + */ + @NonNull + static Feed runFeedParser(@NonNull File feedFile) throws Exception { + FeedHandler handler = new FeedHandler(); + Feed parsedFeed = new Feed("http://example.com/feed", null); + parsedFeed.setFile_url(feedFile.getAbsolutePath()); + parsedFeed.setDownloaded(true); + handler.parseFeed(parsedFeed); + return parsedFeed; + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java new file mode 100644 index 000000000..8f8942d7b --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/namespace/RssParserTest.java @@ -0,0 +1,99 @@ +package de.danoeh.antennapod.parser.feed.element.namespace; + +import android.text.TextUtils; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.robolectric.RobolectricTestRunner; + +import java.io.File; +import java.util.Date; + +import de.danoeh.antennapod.model.feed.Feed; +import de.danoeh.antennapod.model.feed.FeedItem; +import de.danoeh.antennapod.model.feed.FeedMedia; +import de.danoeh.antennapod.model.playback.MediaType; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +/** + * Tests for RSS feeds in FeedHandler. + */ +@RunWith(RobolectricTestRunner.class) +public class RssParserTest { + + @Test + public void testRss2Basic() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testRss2Basic.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals(Feed.TYPE_RSS2, feed.getType()); + assertEquals("title", feed.getTitle()); + assertEquals("en", feed.getLanguage()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertEquals("http://example.com/payment", feed.getPaymentLinks().get(0).url); + assertEquals("http://example.com/picture", feed.getImageUrl()); + assertEquals(10, feed.getItems().size()); + for (int i = 0; i < feed.getItems().size(); i++) { + FeedItem item = feed.getItems().get(i); + assertEquals("http://example.com/item-" + i, item.getItemIdentifier()); + assertEquals("item-" + i, item.getTitle()); + assertNull(item.getDescription()); + assertEquals("http://example.com/items/" + i, item.getLink()); + assertEquals(new Date(i * 60000), item.getPubDate()); + assertNull(item.getPaymentLink()); + assertEquals("http://example.com/picture", item.getImageLocation()); + // media + assertTrue(item.hasMedia()); + FeedMedia media = item.getMedia(); + //noinspection ConstantConditions + assertEquals("http://example.com/media-" + i, media.getDownload_url()); + assertEquals(1024 * 1024, media.getSize()); + assertEquals("audio/mp3", media.getMime_type()); + // chapters + assertNull(item.getChapters()); + } + } + + @Test + public void testImageWithWhitespace() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testImageWithWhitespace.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals("title", feed.getTitle()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertEquals("http://example.com/payment", feed.getPaymentLinks().get(0).url); + assertEquals("https://example.com/image.png", feed.getImageUrl()); + assertEquals(0, feed.getItems().size()); + } + + @Test + public void testMediaContentMime() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testMediaContentMime.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals("title", feed.getTitle()); + assertEquals("http://example.com", feed.getLink()); + assertEquals("This is the description", feed.getDescription()); + assertEquals("http://example.com/payment", feed.getPaymentLinks().get(0).url); + assertNull(feed.getImageUrl()); + assertEquals(1, feed.getItems().size()); + FeedItem feedItem = feed.getItems().get(0); + //noinspection ConstantConditions + assertEquals(MediaType.VIDEO, feedItem.getMedia().getMediaType()); + assertEquals("https://www.example.com/file.mp4", feedItem.getMedia().getDownload_url()); + } + + @Test + public void testMultipleFundingTags() throws Exception { + File feedFile = FeedParserTestHelper.getFeedFile("feed-rss-testMultipleFundingTags.xml"); + Feed feed = FeedParserTestHelper.runFeedParser(feedFile); + assertEquals(3, feed.getPaymentLinks().size()); + assertEquals("Text 1", feed.getPaymentLinks().get(0).content); + assertEquals("https://example.com/funding1", feed.getPaymentLinks().get(0).url); + assertEquals("Text 2", feed.getPaymentLinks().get(1).content); + assertEquals("https://example.com/funding2", feed.getPaymentLinks().get(1).url); + assertTrue(TextUtils.isEmpty(feed.getPaymentLinks().get(2).content)); + assertEquals("https://example.com/funding3", feed.getPaymentLinks().get(2).url); + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DateUtilsTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DateUtilsTest.java new file mode 100644 index 000000000..1f039d703 --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DateUtilsTest.java @@ -0,0 +1,175 @@ +package de.danoeh.antennapod.parser.feed.element.util; + +import de.danoeh.antennapod.parser.feed.util.DateUtils; +import org.junit.Test; + +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; +import java.util.TimeZone; + +import static org.junit.Assert.assertEquals; + +/** + * Unit test for {@link DateUtils}. + */ +public class DateUtilsTest { + + @Test + public void testParseDateWithMicroseconds() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 13, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 963); + Date actual = DateUtils.parse("2015-03-28T13:31:04.963870"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithCentiseconds() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 13, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 960); + Date actual = DateUtils.parse("2015-03-28T13:31:04.96"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithDeciseconds() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 13, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 900); + Date actual = DateUtils.parse("2015-03-28T13:31:04.9"); + assertEquals(expected.getTime() / 1000, actual.getTime() / 1000); + assertEquals(900, actual.getTime() % 1000); + } + + @Test + public void testParseDateWithMicrosecondsAndTimezone() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 6, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 963); + Date actual = DateUtils.parse("2015-03-28T13:31:04.963870 +0700"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithCentisecondsAndTimezone() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 6, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 960); + Date actual = DateUtils.parse("2015-03-28T13:31:04.96 +0700"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithDecisecondsAndTimezone() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 6, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 900); + Date actual = DateUtils.parse("2015-03-28T13:31:04.9 +0700"); + assertEquals(expected.getTime() / 1000, actual.getTime() / 1000); + assertEquals(900, actual.getTime() % 1000); + } + + @Test + public void testParseDateWithTimezoneName() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 6, 31, 4); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis()); + Date actual = DateUtils.parse("Sat, 28 Mar 2015 01:31:04 EST"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithTimezoneName2() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 6, 31, 0); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis()); + Date actual = DateUtils.parse("Sat, 28 Mar 2015 01:31 EST"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithTimeZoneOffset() { + GregorianCalendar exp = new GregorianCalendar(2015, 2, 28, 12, 16, 12); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis()); + Date actual = DateUtils.parse("Sat, 28 March 2015 08:16:12 -0400"); + assertEquals(expected, actual); + } + + @Test + public void testAsctime() { + GregorianCalendar exp = new GregorianCalendar(2011, 4, 25, 12, 33, 0); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis()); + Date actual = DateUtils.parse("Wed, 25 May 2011 12:33:00"); + assertEquals(expected, actual); + } + + @Test + public void testMultipleConsecutiveSpaces() { + GregorianCalendar exp = new GregorianCalendar(2010, 2, 23, 6, 6, 26); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis()); + Date actual = DateUtils.parse("Tue, 23 Mar 2010 01:06:26 -0500"); + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithNoTimezonePadding() { + GregorianCalendar exp = new GregorianCalendar(2017, 1, 22, 22, 28, 0); + exp.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected = new Date(exp.getTimeInMillis() + 2); + Date actual = DateUtils.parse("2017-02-22T14:28:00.002-08:00"); + assertEquals(expected, actual); + } + + /** + * Requires Android platform. Root cause: {@link DateUtils} implementation makes + * use of ISO 8601 time zone, which does not work on standard JDK. + * + * @see #testParseDateWithNoTimezonePadding() + */ + @Test + public void testParseDateWithForCest() { + GregorianCalendar exp1 = new GregorianCalendar(2017, 0, 28, 22, 0, 0); + exp1.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected1 = new Date(exp1.getTimeInMillis()); + Date actual1 = DateUtils.parse("Sun, 29 Jan 2017 00:00:00 CEST"); + assertEquals(expected1, actual1); + + GregorianCalendar exp2 = new GregorianCalendar(2017, 0, 28, 23, 0, 0); + exp2.setTimeZone(TimeZone.getTimeZone("UTC")); + Date expected2 = new Date(exp2.getTimeInMillis()); + Date actual2 = DateUtils.parse("Sun, 29 Jan 2017 00:00:00 CET"); + assertEquals(expected2, actual2); + } + + @Test + public void testParseDateWithIncorrectWeekday() { + GregorianCalendar exp1 = new GregorianCalendar(2014, 9, 8, 9, 0, 0); + exp1.setTimeZone(TimeZone.getTimeZone("GMT")); + Date expected = new Date(exp1.getTimeInMillis()); + Date actual = DateUtils.parse("Thu, 8 Oct 2014 09:00:00 GMT"); // actually a Wednesday + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithBadAbbreviation() { + GregorianCalendar exp1 = new GregorianCalendar(2014, 8, 8, 0, 0, 0); + exp1.setTimeZone(TimeZone.getTimeZone("GMT")); + Date expected = new Date(exp1.getTimeInMillis()); + Date actual = DateUtils.parse("Mon, 8 Sept 2014 00:00:00 GMT"); // should be Sep + assertEquals(expected, actual); + } + + @Test + public void testParseDateWithTwoTimezones() { + final GregorianCalendar exp1 = new GregorianCalendar(2015, Calendar.MARCH, 1, 1, 0, 0); + exp1.setTimeZone(TimeZone.getTimeZone("GMT-4")); + final Date expected = new Date(exp1.getTimeInMillis()); + final Date actual = DateUtils.parse("Sun 01 Mar 2015 01:00:00 GMT-0400 (EDT)"); + assertEquals(expected, actual); + } +} diff --git a/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DurationParserTest.java b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DurationParserTest.java new file mode 100644 index 000000000..91d9ea5ed --- /dev/null +++ b/parser/feed/src/test/java/de/danoeh/antennapod/parser/feed/element/util/DurationParserTest.java @@ -0,0 +1,44 @@ +package de.danoeh.antennapod.parser.feed.element.util; + +import de.danoeh.antennapod.parser.feed.util.DurationParser; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class DurationParserTest { + private int milliseconds = 1; + private int seconds = 1000 * milliseconds; + private int minutes = 60 * seconds; + private int hours = 60 * minutes; + + @Test + public void testSecondDurationInMillis() { + long duration = DurationParser.inMillis("00:45"); + assertEquals(45 * seconds, duration); + } + + @Test + public void testSingleNumberDurationInMillis() { + int twoHoursInSeconds = 2 * 60 * 60; + long duration = DurationParser.inMillis(String.valueOf(twoHoursInSeconds)); + assertEquals(2 * hours, duration); + } + + @Test + public void testMinuteSecondDurationInMillis() { + long duration = DurationParser.inMillis("05:10"); + assertEquals(5 * minutes + 10 * seconds, duration); + } + + @Test + public void testHourMinuteSecondDurationInMillis() { + long duration = DurationParser.inMillis("02:15:45"); + assertEquals(2 * hours + 15 * minutes + 45 * seconds, duration); + } + + @Test + public void testSecondsWithMillisecondsInMillis() { + long duration = DurationParser.inMillis("00:00:00.123"); + assertEquals(123, duration); + } +} diff --git a/parser/feed/src/test/resources/feed-atom-testAtomBasic.xml b/parser/feed/src/test/resources/feed-atom-testAtomBasic.xml new file mode 100644 index 000000000..cefc4f979 --- /dev/null +++ b/parser/feed/src/test/resources/feed-atom-testAtomBasic.xml @@ -0,0 +1 @@ +<?xml version='1.0' encoding='UTF-8' ?><feed xmlns="http://www.w3.org/2005/Atom"><id>http://example.com/feed</id><title>title</title><link rel="alternate" href="http://example.com" /><subtitle>This is the description</subtitle><logo>http://example.com/picture</logo><link rel="payment" href="http://example.com/payment" type="text/html" /><entry><id>http://example.com/item-0</id><title>item-0</title><link rel="alternate" href="http://example.com/items/0" /><published>1970-01-01T00:00:00Z</published><link rel="enclosure" href="http://example.com/media-0" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-1</id><title>item-1</title><link rel="alternate" href="http://example.com/items/1" /><published>1970-01-01T00:01:00Z</published><link rel="enclosure" href="http://example.com/media-1" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-2</id><title>item-2</title><link rel="alternate" href="http://example.com/items/2" /><published>1970-01-01T00:02:00Z</published><link rel="enclosure" href="http://example.com/media-2" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-3</id><title>item-3</title><link rel="alternate" href="http://example.com/items/3" /><published>1970-01-01T00:03:00Z</published><link rel="enclosure" href="http://example.com/media-3" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-4</id><title>item-4</title><link rel="alternate" href="http://example.com/items/4" /><published>1970-01-01T00:04:00Z</published><link rel="enclosure" href="http://example.com/media-4" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-5</id><title>item-5</title><link rel="alternate" href="http://example.com/items/5" /><published>1970-01-01T00:05:00Z</published><link rel="enclosure" href="http://example.com/media-5" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-6</id><title>item-6</title><link rel="alternate" href="http://example.com/items/6" /><published>1970-01-01T00:06:00Z</published><link rel="enclosure" href="http://example.com/media-6" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-7</id><title>item-7</title><link rel="alternate" href="http://example.com/items/7" /><published>1970-01-01T00:07:00Z</published><link rel="enclosure" href="http://example.com/media-7" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-8</id><title>item-8</title><link rel="alternate" href="http://example.com/items/8" /><published>1970-01-01T00:08:00Z</published><link rel="enclosure" href="http://example.com/media-8" type="audio/mp3" length="1048576" /></entry><entry><id>http://example.com/item-9</id><title>item-9</title><link rel="alternate" href="http://example.com/items/9" /><published>1970-01-01T00:09:00Z</published><link rel="enclosure" href="http://example.com/media-9" type="audio/mp3" length="1048576" /></entry></feed>
\ No newline at end of file diff --git a/parser/feed/src/test/resources/feed-atom-testEmptyRelLinks.xml b/parser/feed/src/test/resources/feed-atom-testEmptyRelLinks.xml new file mode 100644 index 000000000..04c28ef67 --- /dev/null +++ b/parser/feed/src/test/resources/feed-atom-testEmptyRelLinks.xml @@ -0,0 +1,14 @@ +<?xml version="1.0" encoding="UTF-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <id>http://example.com/feed</id> + <title>title</title> + <link href="http://example.com" /> + <subtitle>This is the description</subtitle> + <logo>http://example.com/picture</logo> + <entry> + <id>http://example.com/item-0</id> + <title>item-0</title> + <link href="http://example.com/items/0" /> + <published>1970-01-01T00:00:00Z</published> + </entry> +</feed> diff --git a/parser/feed/src/test/resources/feed-atom-testLogoWithWhitespace.xml b/parser/feed/src/test/resources/feed-atom-testLogoWithWhitespace.xml new file mode 100644 index 000000000..f4886d56a --- /dev/null +++ b/parser/feed/src/test/resources/feed-atom-testLogoWithWhitespace.xml @@ -0,0 +1,2 @@ +<?xml version='1.0' encoding='UTF-8' ?><feed xmlns="http://www.w3.org/2005/Atom"><id>http://example.com/feed</id><title>title</title><link rel="alternate" href="http://example.com" /><subtitle>This is the description</subtitle><link rel="payment" href="http://example.com/payment" type="text/html" /><logo> https://example.com/image.png +</logo></feed>
\ No newline at end of file diff --git a/parser/feed/src/test/resources/feed-rss-testImageWithWhitespace.xml b/parser/feed/src/test/resources/feed-rss-testImageWithWhitespace.xml new file mode 100644 index 000000000..2be9401d2 --- /dev/null +++ b/parser/feed/src/test/resources/feed-rss-testImageWithWhitespace.xml @@ -0,0 +1,2 @@ +<?xml version='1.0' encoding='UTF-8' ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>title</title><description>This is the description</description><link>http://example.com</link><language>en</language><atom:link rel="payment" href="http://example.com/payment" type="text/html" /><image><url> https://example.com/image.png +</url></image></channel></rss>
\ No newline at end of file diff --git a/parser/feed/src/test/resources/feed-rss-testMediaContentMime.xml b/parser/feed/src/test/resources/feed-rss-testMediaContentMime.xml new file mode 100644 index 000000000..a715abb37 --- /dev/null +++ b/parser/feed/src/test/resources/feed-rss-testMediaContentMime.xml @@ -0,0 +1 @@ +<?xml version='1.0' encoding='UTF-8' ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>title</title><description>This is the description</description><link>http://example.com</link><language>en</language><atom:link rel="payment" href="http://example.com/payment" type="text/html" /><item xmlns:media="http://search.yahoo.com/mrss/"><media:content url="https://www.example.com/file.mp4" medium="video" /></item></channel></rss>
\ No newline at end of file diff --git a/parser/feed/src/test/resources/feed-rss-testMultipleFundingTags.xml b/parser/feed/src/test/resources/feed-rss-testMultipleFundingTags.xml new file mode 100644 index 000000000..2535bda32 --- /dev/null +++ b/parser/feed/src/test/resources/feed-rss-testMultipleFundingTags.xml @@ -0,0 +1,9 @@ +<?xml version='1.0' encoding='UTF-8' ?> +<rss version="2.0" xmlns:podcast="https://podcastindex.org/namespace/1.0"> + <channel> + <title>title</title> + </channel> + <podcast:funding url="https://example.com/funding1">Text 1</podcast:funding> + <podcast:funding url="https://example.com/funding2">Text 2</podcast:funding> + <podcast:funding url="https://example.com/funding3" /> +</rss> diff --git a/parser/feed/src/test/resources/feed-rss-testRss2Basic.xml b/parser/feed/src/test/resources/feed-rss-testRss2Basic.xml new file mode 100644 index 000000000..dd771b61a --- /dev/null +++ b/parser/feed/src/test/resources/feed-rss-testRss2Basic.xml @@ -0,0 +1 @@ +<?xml version='1.0' encoding='UTF-8' ?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom"><channel><title>title</title><description>This is the description</description><link>http://example.com</link><language>en</language><image><url>http://example.com/picture</url></image><atom:link rel="payment" href="http://example.com/payment" type="text/html" /><item><title>item-0</title><link>http://example.com/items/0</link><pubDate>01 Jan 70 01:00:00 +0100</pubDate><guid>http://example.com/item-0</guid><enclosure url="http://example.com/media-0" length="1048576" type="audio/mp3" /></item><item><title>item-1</title><link>http://example.com/items/1</link><pubDate>01 Jan 70 01:01:00 +0100</pubDate><guid>http://example.com/item-1</guid><enclosure url="http://example.com/media-1" length="1048576" type="audio/mp3" /></item><item><title>item-2</title><link>http://example.com/items/2</link><pubDate>01 Jan 70 01:02:00 +0100</pubDate><guid>http://example.com/item-2</guid><enclosure url="http://example.com/media-2" length="1048576" type="audio/mp3" /></item><item><title>item-3</title><link>http://example.com/items/3</link><pubDate>01 Jan 70 01:03:00 +0100</pubDate><guid>http://example.com/item-3</guid><enclosure url="http://example.com/media-3" length="1048576" type="audio/mp3" /></item><item><title>item-4</title><link>http://example.com/items/4</link><pubDate>01 Jan 70 01:04:00 +0100</pubDate><guid>http://example.com/item-4</guid><enclosure url="http://example.com/media-4" length="1048576" type="audio/mp3" /></item><item><title>item-5</title><link>http://example.com/items/5</link><pubDate>01 Jan 70 01:05:00 +0100</pubDate><guid>http://example.com/item-5</guid><enclosure url="http://example.com/media-5" length="1048576" type="audio/mp3" /></item><item><title>item-6</title><link>http://example.com/items/6</link><pubDate>01 Jan 70 01:06:00 +0100</pubDate><guid>http://example.com/item-6</guid><enclosure url="http://example.com/media-6" length="1048576" type="audio/mp3" /></item><item><title>item-7</title><link>http://example.com/items/7</link><pubDate>01 Jan 70 01:07:00 +0100</pubDate><guid>http://example.com/item-7</guid><enclosure url="http://example.com/media-7" length="1048576" type="audio/mp3" /></item><item><title>item-8</title><link>http://example.com/items/8</link><pubDate>01 Jan 70 01:08:00 +0100</pubDate><guid>http://example.com/item-8</guid><enclosure url="http://example.com/media-8" length="1048576" type="audio/mp3" /></item><item><title>item-9</title><link>http://example.com/items/9</link><pubDate>01 Jan 70 01:09:00 +0100</pubDate><guid>http://example.com/item-9</guid><enclosure url="http://example.com/media-9" length="1048576" type="audio/mp3" /></item></channel></rss>
\ No newline at end of file |