summaryrefslogtreecommitdiff
path: root/parser
diff options
context:
space:
mode:
authorByteHamster <ByteHamster@users.noreply.github.com>2021-09-06 17:59:17 +0200
committerGitHub <noreply@github.com>2021-09-06 17:59:17 +0200
commitb36cdb0c4ecb987b1a8e0168c15552c7c11d03a1 (patch)
tree02c4645f0ef82fb76d7c2fe575df8aa3ac33d130 /parser
parentb9f578ed5c83dff7ebf70e2fb5d6ded9c9d4482f (diff)
downloadantennapod-b36cdb0c4ecb987b1a8e0168c15552c7c11d03a1.zip
Improvements related to duplicate detection (#5387)
* Move duplicate detection to one single place * Canonicalize some common characters that are often confused * Assume same episode even when date is off by 1 week * Display duplicate detection as warning, not error
Diffstat (limited to 'parser')
-rw-r--r--parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java55
1 files changed, 0 insertions, 55 deletions
diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java
index c7f5c4f21..6b364fa73 100644
--- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java
+++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java
@@ -1,8 +1,5 @@
package de.danoeh.antennapod.parser.feed;
-import android.text.TextUtils;
-import android.util.Log;
-
import de.danoeh.antennapod.parser.feed.util.TypeGetter;
import org.apache.commons.io.input.XmlStreamReader;
import org.xml.sax.InputSource;
@@ -11,22 +8,14 @@ import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import de.danoeh.antennapod.model.feed.Feed;
-import de.danoeh.antennapod.model.feed.FeedItem;
public class FeedHandler {
- private static final String TAG = "FeedHandler";
-
public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException,
ParserConfigurationException, UnsupportedFeedtypeException {
TypeGetter tg = new TypeGetter();
@@ -42,50 +31,6 @@ public class FeedHandler {
saxParser.parse(inputSource, handler);
inputStreamReader.close();
- feed.setItems(dedupItems(feed.getItems()));
return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls);
}
-
- /**
- * For updating items that are stored in the database, see also: DBTasks.searchFeedItemByIdentifyingValue
- */
- public static List<FeedItem> dedupItems(List<FeedItem> items) {
- if (items == null) {
- return null;
- }
- List<FeedItem> list = new ArrayList<>(items);
- Set<String> seen = new HashSet<>();
- Iterator<FeedItem> it = list.iterator();
- while (it.hasNext()) {
- FeedItem item = it.next();
- if (!TextUtils.isEmpty(item.getItemIdentifier()) && seen.contains(item.getItemIdentifier())) {
- Log.d(TAG, "Removing duplicate episode guid " + item.getItemIdentifier());
- it.remove();
- continue;
- }
-
- if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) {
- continue;
- }
- if (seen.contains(item.getMedia().getStreamUrl())) {
- Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl());
- it.remove();
- } else {
- seen.add(item.getMedia().getStreamUrl());
- if (TextUtils.isEmpty(item.getTitle()) || item.getPubDate() == null) {
- continue;
- }
- if (!seen.contains(item.getTitle() + item.getPubDate().toString())) {
- seen.add(item.getTitle() + item.getPubDate().toString());
- } else {
- Log.d(TAG, "Removing duplicate episode title and pubDate "
- + item.getTitle()
- + " " + item.getPubDate());
- it.remove();
- }
- }
- seen.add(item.getItemIdentifier());
- }
- return list;
- }
}