summaryrefslogtreecommitdiff
path: root/core/src
diff options
context:
space:
mode:
authorTony Tam <149837+tonytamsf@users.noreply.github.com>2021-07-11 00:58:54 -0700
committerGitHub <noreply@github.com>2021-07-11 09:58:54 +0200
commit81ea42a2a46fea1f633bc34352fde02f4c412f7c (patch)
treef01a46798f7709f34b3756a175c0609b959d16da /core/src
parentb4558efe4abe2b0b55963c4585bf413af72e2699 (diff)
downloadAntennaPod-81ea42a2a46fea1f633bc34352fde02f4c412f7c.zip
Dedup based on item unique id, media url or title (#4839)
Diffstat (limited to 'core/src')
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java35
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java88
-rw-r--r--core/src/test/java/de/danoeh/antennapod/core/storage/DbCleanupTests.java2
-rw-r--r--core/src/test/java/de/danoeh/antennapod/core/storage/DbNullCleanupAlgorithmTest.java2
-rw-r--r--core/src/test/java/de/danoeh/antennapod/core/storage/DbTasksTest.java34
5 files changed, 136 insertions, 25 deletions
diff --git a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
index 4ccd34e28..ee3cf31a1 100644
--- a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
+++ b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
@@ -334,11 +334,36 @@ public final class DBTasks {
}
/**
- * Get a FeedItem by its identifying value.
+ * Get a FeedItem by its identifying value or download_url.
+ * For de-duplicating items that are not stored yet, see also FeedHandler.dedupItems
*/
- private static FeedItem searchFeedItemByIdentifyingValue(Feed feed, String identifier) {
+ private static FeedItem searchFeedItemByIdentifyingValue(Feed feed, FeedItem searchItem) {
for (FeedItem item : feed.getItems()) {
- if (TextUtils.equals(item.getIdentifyingValue(), identifier)) {
+ if (TextUtils.equals(item.getIdentifyingValue(), searchItem.getIdentifyingValue())) {
+ return item;
+ }
+ }
+ // Did not find item with same ID. Try to guess duplicates based on other metadata.
+ for (FeedItem item : feed.getItems()) {
+ if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) {
+ continue;
+ }
+
+ boolean isDuplicate = false;
+ if (TextUtils.equals(item.getMedia().getStreamUrl(), searchItem.getMedia().getStreamUrl())) {
+ Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl());
+ isDuplicate = true;
+ } else if (TextUtils.equals(item.getTitle(), searchItem.getTitle())
+ && item.getPubDate().equals(searchItem.getPubDate())) {
+ Log.d(TAG, "Removing duplicate episode title + pubDate " + item.getTitle() + " " + item.getPubDate());
+ isDuplicate = true;
+ }
+ if (isDuplicate) {
+ DBWriter.addDownloadStatus(new DownloadStatus(feed,
+ searchItem.getTitle(), DownloadError.ERROR_PARSER_EXCEPTION, false,
+ "The podcast host changed the ID of an existing episode instead of just "
+ + "updating the episode itself. AntennaPod attempted to repair it.", false));
+ item.setItemIdentifier(searchItem.getItemIdentifier());
return item;
}
}
@@ -411,7 +436,7 @@ public final class DBTasks {
// Look for new or updated Items
for (int idx = 0; idx < newFeed.getItems().size(); idx++) {
final FeedItem item = newFeed.getItems().get(idx);
- FeedItem oldItem = searchFeedItemByIdentifyingValue(savedFeed, item.getIdentifyingValue());
+ FeedItem oldItem = searchFeedItemByIdentifyingValue(savedFeed, item);
if (oldItem == null) {
// item is new
item.setFeed(savedFeed);
@@ -445,7 +470,7 @@ public final class DBTasks {
Iterator<FeedItem> it = savedFeed.getItems().iterator();
while (it.hasNext()) {
FeedItem feedItem = it.next();
- if (searchFeedItemByIdentifyingValue(newFeed, feedItem.getIdentifyingValue()) == null) {
+ if (searchFeedItemByIdentifyingValue(newFeed, feedItem) == null) {
unlistedItems.add(feedItem);
it.remove();
}
diff --git a/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java b/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
index c9e6ce5fa..fb28d58c4 100644
--- a/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
+++ b/core/src/main/java/de/danoeh/antennapod/core/syndication/handler/FeedHandler.java
@@ -1,5 +1,8 @@
package de.danoeh.antennapod.core.syndication.handler;
+import android.text.TextUtils;
+import android.util.Log;
+
import org.apache.commons.io.input.XmlStreamReader;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -7,30 +10,81 @@ import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import de.danoeh.antennapod.model.feed.Feed;
+import de.danoeh.antennapod.model.feed.FeedItem;
public class FeedHandler {
+ private static final String TAG = "FeedHandler";
+
+ public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException,
+ ParserConfigurationException, UnsupportedFeedtypeException {
+ TypeGetter tg = new TypeGetter();
+ TypeGetter.Type type = tg.getType(feed);
+ SyndHandler handler = new SyndHandler(feed, type);
+
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(true);
+ SAXParser saxParser = factory.newSAXParser();
+ File file = new File(feed.getFile_url());
+ Reader inputStreamReader = new XmlStreamReader(file);
+ InputSource inputSource = new InputSource(inputStreamReader);
+
+ saxParser.parse(inputSource, handler);
+ inputStreamReader.close();
+ feed.setItems(dedupItems(feed.getItems()));
+ return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls);
+ }
+
+ /**
+ * For updating items that are stored in the database, see also: DBTasks.searchFeedItemByIdentifyingValue
+ */
+ public static List<FeedItem> dedupItems(List<FeedItem> items) {
+ if (items == null) {
+ return null;
+ }
+ List<FeedItem> list = new ArrayList<>(items);
+ Set<String> seen = new HashSet<>();
+ Iterator<FeedItem> it = list.iterator();
+ while (it.hasNext()) {
+ FeedItem item = it.next();
+ if (seen.contains(item.getItemIdentifier())) {
+ Log.d(TAG, "Removing duplicate episode guid " + item.getItemIdentifier());
+ it.remove();
+ continue;
+ }
- public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException,
- ParserConfigurationException, UnsupportedFeedtypeException {
- TypeGetter tg = new TypeGetter();
- TypeGetter.Type type = tg.getType(feed);
- SyndHandler handler = new SyndHandler(feed, type);
-
- SAXParserFactory factory = SAXParserFactory.newInstance();
- factory.setNamespaceAware(true);
- SAXParser saxParser = factory.newSAXParser();
- File file = new File(feed.getFile_url());
- Reader inputStreamReader = new XmlStreamReader(file);
- InputSource inputSource = new InputSource(inputStreamReader);
-
- saxParser.parse(inputSource, handler);
- inputStreamReader.close();
- return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls);
- }
+ if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) {
+ continue;
+ }
+ if (seen.contains(item.getMedia().getStreamUrl())) {
+ Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl());
+ it.remove();
+ } else {
+ seen.add(item.getMedia().getStreamUrl());
+ if (TextUtils.isEmpty(item.getTitle()) || TextUtils.isEmpty(item.getPubDate().toString())) {
+ continue;
+ }
+ if (!seen.contains(item.getTitle() + item.getPubDate().toString())) {
+ seen.add(item.getTitle() + item.getPubDate().toString());
+ } else {
+ Log.d(TAG, "Removing duplicate episode title and pubDate "
+ + item.getTitle()
+ + " " + item.getPubDate());
+ it.remove();
+ }
+ }
+ seen.add(item.getItemIdentifier());
+ }
+ return list;
+ }
}
diff --git a/core/src/test/java/de/danoeh/antennapod/core/storage/DbCleanupTests.java b/core/src/test/java/de/danoeh/antennapod/core/storage/DbCleanupTests.java
index 1c4c92574..b5c2e5c73 100644
--- a/core/src/test/java/de/danoeh/antennapod/core/storage/DbCleanupTests.java
+++ b/core/src/test/java/de/danoeh/antennapod/core/storage/DbCleanupTests.java
@@ -134,7 +134,7 @@ public class DbCleanupTests {
if (itemState == FeedItem.PLAYED) {
playbackCompletionDate = itemDate;
}
- FeedItem item = new FeedItem(0, "title", "id", "link", itemDate, itemState, feed);
+ FeedItem item = new FeedItem(0, "title", "id" + i, "link", itemDate, itemState, feed);
File f = new File(destFolder, "file " + i);
assertTrue(f.createNewFile());
diff --git a/core/src/test/java/de/danoeh/antennapod/core/storage/DbNullCleanupAlgorithmTest.java b/core/src/test/java/de/danoeh/antennapod/core/storage/DbNullCleanupAlgorithmTest.java
index 733318724..6485c9515 100644
--- a/core/src/test/java/de/danoeh/antennapod/core/storage/DbNullCleanupAlgorithmTest.java
+++ b/core/src/test/java/de/danoeh/antennapod/core/storage/DbNullCleanupAlgorithmTest.java
@@ -92,7 +92,7 @@ public class DbNullCleanupAlgorithmTest {
feed.setItems(items);
List<File> files = new ArrayList<>();
for (int i = 0; i < numItems; i++) {
- FeedItem item = new FeedItem(0, "title", "id", "link", new Date(), FeedItem.PLAYED, feed);
+ FeedItem item = new FeedItem(0, "title", "id" + i, "link", new Date(), FeedItem.PLAYED, feed);
File f = new File(destFolder, "file " + i);
assertTrue(f.createNewFile());
diff --git a/core/src/test/java/de/danoeh/antennapod/core/storage/DbTasksTest.java b/core/src/test/java/de/danoeh/antennapod/core/storage/DbTasksTest.java
index 9e3dca0f7..0d98ba294 100644
--- a/core/src/test/java/de/danoeh/antennapod/core/storage/DbTasksTest.java
+++ b/core/src/test/java/de/danoeh/antennapod/core/storage/DbTasksTest.java
@@ -197,6 +197,38 @@ public class DbTasksTest {
assertEquals(8, feedFromDB.getItems().size()); // 10 - 2 = 8 items
}
+ @Test
+ public void testUpdateFeedSetDuplicate() {
+ final Feed feed = new Feed("url", null, "title");
+ feed.setItems(new ArrayList<>());
+ for (int i = 0; i < 10; i++) {
+ FeedItem item =
+ new FeedItem(0, "item " + i, "id " + i, "link " + i, new Date(i), FeedItem.PLAYED, feed);
+ FeedMedia media = new FeedMedia(item, "download url " + i, 123, "media/mp3");
+ item.setMedia(media);
+ feed.getItems().add(item);
+ }
+ PodDBAdapter adapter = PodDBAdapter.getInstance();
+ adapter.open();
+ adapter.setCompleteFeed(feed);
+ adapter.close();
+
+ // change the guid of the first item, but leave the download url the same
+ FeedItem item = feed.getItemAtIndex(0);
+ item.setItemIdentifier("id 0-duplicate");
+ item.setTitle("item 0 duplicate");
+ Feed newFeed = DBTasks.updateFeed(context, feed, false);
+ assertEquals(10, newFeed.getItems().size()); // id 1-duplicate replaces because the stream url is the same
+
+ Feed feedFromDB = DBReader.getFeed(newFeed.getId());
+ assertEquals(10, feedFromDB.getItems().size()); // id1-duplicate should override id 1
+
+ FeedItem updatedItem = feedFromDB.getItemAtIndex(9);
+ assertEquals("item 0 duplicate", updatedItem.getTitle());
+ assertEquals("id 0-duplicate", updatedItem.getItemIdentifier()); // Should use the new ID for sync etc
+ }
+
+
@SuppressWarnings("SameParameterValue")
private void updatedFeedTest(final Feed newFeed, long feedID, List<Long> itemIDs,
int numItemsOld, int numItemsNew) {
@@ -285,7 +317,7 @@ public class DbTasksTest {
if (numFeedItems > 0) {
List<FeedItem> items = new ArrayList<>(numFeedItems);
for (int i = 1; i <= numFeedItems; i++) {
- FeedItem item = new FeedItem(0, "item " + i + " of " + title, "id", "link",
+ FeedItem item = new FeedItem(0, "item " + i + " of " + title, "id" + title + i, "link",
new Date(), FeedItem.UNPLAYED, feed);
items.add(item);
}