summaryrefslogtreecommitdiff
path: root/core/src/main/java
diff options
context:
space:
mode:
Diffstat (limited to 'core/src/main/java')
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java35
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java88
2 files changed, 101 insertions, 22 deletions
diff --git a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
index 4ccd34e28..ee3cf31a1 100644
--- a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
+++ b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
@@ -334,11 +334,36 @@ public final class DBTasks {
}
/**
- * Get a FeedItem by its identifying value.
+ * Get a FeedItem by its identifying value or download_url.
+ * For de-duplicating items that are not stored yet, see also FeedHandler.dedupItems
*/
- private static FeedItem searchFeedItemByIdentifyingValue(Feed feed, String identifier) {
+ private static FeedItem searchFeedItemByIdentifyingValue(Feed feed, FeedItem searchItem) {
for (FeedItem item : feed.getItems()) {
- if (TextUtils.equals(item.getIdentifyingValue(), identifier)) {
+ if (TextUtils.equals(item.getIdentifyingValue(), searchItem.getIdentifyingValue())) {
+ return item;
+ }
+ }
+ // Did not find item with same ID. Try to guess duplicates based on other metadata.
+ for (FeedItem item : feed.getItems()) {
+ if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) {
+ continue;
+ }
+
+ boolean isDuplicate = false;
+ if (TextUtils.equals(item.getMedia().getStreamUrl(), searchItem.getMedia().getStreamUrl())) {
+ Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl());
+ isDuplicate = true;
+ } else if (TextUtils.equals(item.getTitle(), searchItem.getTitle())
+ && item.getPubDate().equals(searchItem.getPubDate())) {
+ Log.d(TAG, "Removing duplicate episode title + pubDate " + item.getTitle() + " " + item.getPubDate());
+ isDuplicate = true;
+ }
+ if (isDuplicate) {
+ DBWriter.addDownloadStatus(new DownloadStatus(feed,
+ searchItem.getTitle(), DownloadError.ERROR_PARSER_EXCEPTION, false,
+ "The podcast host changed the ID of an existing episode instead of just "
+ + "updating the episode itself. AntennaPod attempted to repair it.", false));
+ item.setItemIdentifier(searchItem.getItemIdentifier());
return item;
}
}
@@ -411,7 +436,7 @@ public final class DBTasks {
// Look for new or updated Items
for (int idx = 0; idx < newFeed.getItems().size(); idx++) {
final FeedItem item = newFeed.getItems().get(idx);
- FeedItem oldItem = searchFeedItemByIdentifyingValue(savedFeed, item.getIdentifyingValue());
+ FeedItem oldItem = searchFeedItemByIdentifyingValue(savedFeed, item);
if (oldItem == null) {
// item is new
item.setFeed(savedFeed);
@@ -445,7 +470,7 @@ public final class DBTasks {
Iterator<FeedItem> it = savedFeed.getItems().iterator();
while (it.hasNext()) {
FeedItem feedItem = it.next();
- if (searchFeedItemByIdentifyingValue(newFeed, feedItem.getIdentifyingValue()) == null) {
+ if (searchFeedItemByIdentifyingValue(newFeed, feedItem) == null) {
unlistedItems.add(feedItem);
it.remove();
}
diff --git a/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java b/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
index c9e6ce5fa..fb28d58c4 100644
--- a/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
+++ b/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
@@ -1,5 +1,8 @@
package de.danoeh.antennapod.core.syndication.handler;
+import android.text.TextUtils;
+import android.util.Log;
+
import org.apache.commons.io.input.XmlStreamReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -7,30 +10,81 @@ import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import de.danoeh.antennapod.model.feed.Feed;
+import de.danoeh.antennapod.model.feed.FeedItem;
public class FeedHandler {
+ private static final String TAG = "FeedHandler";
+
+ public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException,
+ ParserConfigurationException, UnsupportedFeedtypeException {
+ TypeGetter tg = new TypeGetter();
+ TypeGetter.Type type = tg.getType(feed);
+ SyndHandler handler = new SyndHandler(feed, type);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ SAXParser saxParser = factory.newSAXParser();
+ File file = new File(feed.getFile_url());
+ Reader inputStreamReader = new XmlStreamReader(file);
+ InputSource inputSource = new InputSource(inputStreamReader);
+
+ saxParser.parse(inputSource, handler);
+ inputStreamReader.close();
+ feed.setItems(dedupItems(feed.getItems()));
+ return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls);
+ }
+
+ /**
+ * For updating items that are stored in the database, see also: DBTasks.searchFeedItemByIdentifyingValue
+ */
+ public static List<FeedItem> dedupItems(List<FeedItem> items) {
+ if (items == null) {
+ return null;
+ }
+ List<FeedItem> list = new ArrayList<>(items);
+ Set<String> seen = new HashSet<>();
+ Iterator<FeedItem> it = list.iterator();
+ while (it.hasNext()) {
+ FeedItem item = it.next();
+ if (seen.contains(item.getItemIdentifier())) {
+ Log.d(TAG, "Removing duplicate episode guid " + item.getItemIdentifier());
+ it.remove();
+ continue;
+ }
- public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException,
- ParserConfigurationException, UnsupportedFeedtypeException {
- TypeGetter tg = new TypeGetter();
- TypeGetter.Type type = tg.getType(feed);
- SyndHandler handler = new SyndHandler(feed, type);
-
- SAXParserFactory factory = SAXParserFactory.newInstance();
- factory.setNamespaceAware(true);
- SAXParser saxParser = factory.newSAXParser();
- File file = new File(feed.getFile_url());
- Reader inputStreamReader = new XmlStreamReader(file);
- InputSource inputSource = new InputSource(inputStreamReader);
-
- saxParser.parse(inputSource, handler);
- inputStreamReader.close();
- return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls);
- }
+ if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) {
+ continue;
+ }
+ if (seen.contains(item.getMedia().getStreamUrl())) {
+ Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl());
+ it.remove();
+ } else {
+ seen.add(item.getMedia().getStreamUrl());
+ if (TextUtils.isEmpty(item.getTitle()) || TextUtils.isEmpty(item.getPubDate().toString())) {
+ continue;
+ }
+ if (!seen.contains(item.getTitle() + item.getPubDate().toString())) {
+ seen.add(item.getTitle() + item.getPubDate().toString());
+ } else {
+ Log.d(TAG, "Removing duplicate episode title and pubDate "
+ + item.getTitle()
+ + " " + item.getPubDate());
+ it.remove();
+ }
+ }
+ seen.add(item.getItemIdentifier());
+ }
+ return list;
+ }
}