diff options
author | ByteHamster <ByteHamster@users.noreply.github.com> | 2021-09-06 17:59:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-09-06 17:59:17 +0200 |
commit | b36cdb0c4ecb987b1a8e0168c15552c7c11d03a1 (patch) | |
tree | 02c4645f0ef82fb76d7c2fe575df8aa3ac33d130 /parser/feed/src | |
parent | b9f578ed5c83dff7ebf70e2fb5d6ded9c9d4482f (diff) | |
download | AntennaPod-b36cdb0c4ecb987b1a8e0168c15552c7c11d03a1.zip |
Improvements related to duplicate detection (#5387)
* Move duplicate detection to one single place
* Canonicalize some common characters that are often confused
* Assume same episode even when date is off by 1 week
* Display duplicate detection as warning, not error
Diffstat (limited to 'parser/feed/src')
-rw-r--r-- | parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java | 55 |
1 files changed, 0 insertions, 55 deletions
diff --git a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java index c7f5c4f21..6b364fa73 100644 --- a/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java +++ b/parser/feed/src/main/java/de/danoeh/antennapod/parser/feed/FeedHandler.java @@ -1,8 +1,5 @@ package de.danoeh.antennapod.parser.feed; -import android.text.TextUtils; -import android.util.Log; - import de.danoeh.antennapod.parser.feed.util.TypeGetter; import org.apache.commons.io.input.XmlStreamReader; import org.xml.sax.InputSource; @@ -11,22 +8,14 @@ import org.xml.sax.SAXException; import java.io.File; import java.io.IOException; import java.io.Reader; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Set; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import de.danoeh.antennapod.model.feed.Feed; -import de.danoeh.antennapod.model.feed.FeedItem; public class FeedHandler { - private static final String TAG = "FeedHandler"; - public FeedHandlerResult parseFeed(Feed feed) throws SAXException, IOException, ParserConfigurationException, UnsupportedFeedtypeException { TypeGetter tg = new TypeGetter(); @@ -42,50 +31,6 @@ public class FeedHandler { saxParser.parse(inputSource, handler); inputStreamReader.close(); - feed.setItems(dedupItems(feed.getItems())); return new FeedHandlerResult(handler.state.feed, handler.state.alternateUrls); } - - /** - * For updating items that are stored in the database, see also: DBTasks.searchFeedItemByIdentifyingValue - */ - public static List<FeedItem> dedupItems(List<FeedItem> items) { - if (items == null) { - return null; - } - List<FeedItem> list = new ArrayList<>(items); - Set<String> seen = new HashSet<>(); - Iterator<FeedItem> it = list.iterator(); - while (it.hasNext()) { - FeedItem item = it.next(); - if (!TextUtils.isEmpty(item.getItemIdentifier()) && seen.contains(item.getItemIdentifier())) { - Log.d(TAG, "Removing duplicate episode guid " + item.getItemIdentifier()); - it.remove(); - continue; - } - - if (item.getMedia() == null || TextUtils.isEmpty(item.getMedia().getStreamUrl())) { - continue; - } - if (seen.contains(item.getMedia().getStreamUrl())) { - Log.d(TAG, "Removing duplicate episode stream url " + item.getMedia().getStreamUrl()); - it.remove(); - } else { - seen.add(item.getMedia().getStreamUrl()); - if (TextUtils.isEmpty(item.getTitle()) || item.getPubDate() == null) { - continue; - } - if (!seen.contains(item.getTitle() + item.getPubDate().toString())) { - seen.add(item.getTitle() + item.getPubDate().toString()); - } else { - Log.d(TAG, "Removing duplicate episode title and pubDate " - + item.getTitle() - + " " + item.getPubDate()); - it.remove(); - } - } - seen.add(item.getItemIdentifier()); - } - return list; - } } |