summaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorByteHamster <info@bytehamster.com>2021-12-14 22:11:36 +0100
committerByteHamster <info@bytehamster.com>2021-12-18 20:44:01 +0100
commitdde499f5b1fd9a7c4c15cd4125cdb877d2d11aed (patch)
tree9d71d666345faa1491882e82d3e5dc5b37f3616f /core
parent19dfa08905e33943995edb1703b8ddc3bb9fc0ea (diff)
downloadantennapod-dde499f5b1fd9a7c4c15cd4125cdb877d2d11aed.zip
Only mark items as duplicates if duration and date are similar
Diffstat (limited to 'core')
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java31
-rw-r--r--core/src/main/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesser.java70
-rw-r--r--core/src/test/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesserTest.java66
3 files changed, 137 insertions, 30 deletions
diff --git a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
index 719620202..a0c1e54ad 100644
--- a/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
+++ b/core/src/main/java/de/danoeh/antennapod/core/storage/DBTasks.java
@@ -352,42 +352,13 @@ public final class DBTasks {
*/
private static FeedItem searchFeedItemGuessDuplicate(List<FeedItem> items, FeedItem searchItem) {
for (FeedItem item : items) {
- if ((item.getMedia() != null)
- && (searchItem.getMedia() != null)
- && !TextUtils.isEmpty(item.getMedia().getStreamUrl())
- && !TextUtils.isEmpty(searchItem.getMedia().getStreamUrl())
- && TextUtils.equals(item.getMedia().getStreamUrl(), searchItem.getMedia().getStreamUrl())) {
+ if (FeedItemDuplicateGuesser.seemDuplicates(item, searchItem)) {
return item;
- } else if (titlesLookSimilar(item.getTitle(), searchItem.getTitle())) {
- if (searchItem.getPubDate() == null || item.getPubDate() == null) {
- continue;
- }
- long dateOriginal = item.getPubDate().getTime();
- long dateNew = searchItem.getPubDate().getTime();
- if (Math.abs(dateOriginal - dateNew) < 7L * 24L * 3600L * 1000L) { // Same week
- return item;
- }
}
}
return null;
}
- private static boolean titlesLookSimilar(String title1, String title2) {
- if (TextUtils.isEmpty(title1) || TextUtils.isEmpty(title2)) {
- return false;
- }
- return canonicalizeTitle(title1).equals(canonicalizeTitle(title2));
- }
-
- private static String canonicalizeTitle(String title) {
- return title
- .trim()
- .replace('“', '"')
- .replace('”', '"')
- .replace('„', '"')
- .replace('—', '-');
- }
-
/**
* Adds new Feeds to the database or updates the old versions if they already exists. If another Feed with the same
* identifying value already exists, this method will add new FeedItems from the new Feed to the existing Feed.
diff --git a/core/src/main/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesser.java b/core/src/main/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesser.java
new file mode 100644
index 000000000..35d77ae4a
--- /dev/null
+++ b/core/src/main/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesser.java
@@ -0,0 +1,70 @@
+package de.danoeh.antennapod.core.storage;
+
+import android.text.TextUtils;
+import de.danoeh.antennapod.model.feed.FeedItem;
+import de.danoeh.antennapod.model.feed.FeedMedia;
+
+import java.text.DateFormat;
+import java.util.Locale;
+
+/**
+ * Publishers sometimes mess up their feed by adding episodes twice or by changing the ID of existing episodes.
+ * This class tries to guess if publishers actually meant another episode,
+ * even if their feed explicitly says that the episodes are different.
+ */
+public class FeedItemDuplicateGuesser {
+ public static boolean seemDuplicates(FeedItem item1, FeedItem item2) {
+ if (sameAndNotEmpty(item1.getItemIdentifier(), item2.getItemIdentifier())) {
+ return true;
+ }
+ FeedMedia media1 = item1.getMedia();
+ FeedMedia media2 = item2.getMedia();
+ if (media1 == null || media2 == null) {
+ return false;
+ }
+ if (sameAndNotEmpty(media1.getStreamUrl(), media2.getStreamUrl())) {
+ return true;
+ }
+ return titlesLookSimilar(item1, item2)
+ && datesLookSimilar(item1, item2)
+ && durationsLookSimilar(media1, media2)
+ && TextUtils.equals(media1.getMime_type(), media2.getMime_type());
+ }
+
+ private static boolean sameAndNotEmpty(String string1, String string2) {
+ if (TextUtils.isEmpty(string1) || TextUtils.isEmpty(string2)) {
+ return false;
+ }
+ return string1.equals(string2);
+ }
+
+ private static boolean datesLookSimilar(FeedItem item1, FeedItem item2) {
+ if (item1.getPubDate() == null || item2.getPubDate() == null) {
+ return false;
+ }
+ DateFormat dateFormat = DateFormat.getDateInstance(DateFormat.SHORT, Locale.US); // MM/DD/YY
+ String dateOriginal = dateFormat.format(item2.getPubDate());
+ String dateNew = dateFormat.format(item1.getPubDate());
+ return TextUtils.equals(dateOriginal, dateNew); // Same date; time is ignored.
+ }
+
+ private static boolean durationsLookSimilar(FeedMedia media1, FeedMedia media2) {
+ return Math.abs(media1.getDuration() - media2.getDuration()) < 10 * 60L * 1000L;
+ }
+
+ private static boolean titlesLookSimilar(FeedItem item1, FeedItem item2) {
+ return sameAndNotEmpty(canonicalizeTitle(item1.getTitle()), canonicalizeTitle(item2.getTitle()));
+ }
+
+ private static String canonicalizeTitle(String title) {
+ if (title == null) {
+ return "";
+ }
+ return title
+ .trim()
+ .replace('“', '"')
+ .replace('”', '"')
+ .replace('„', '"')
+ .replace('—', '-');
+ }
+}
diff --git a/core/src/test/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesserTest.java b/core/src/test/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesserTest.java
new file mode 100644
index 000000000..ac7cdee1f
--- /dev/null
+++ b/core/src/test/java/de/danoeh/antennapod/core/storage/FeedItemDuplicateGuesserTest.java
@@ -0,0 +1,66 @@
+package de.danoeh.antennapod.core.storage;
+
+import de.danoeh.antennapod.model.feed.FeedItem;
+import de.danoeh.antennapod.model.feed.FeedMedia;
+import org.junit.Test;
+
+import java.util.Date;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+/**
+ * Test class for {@link FeedItemDuplicateGuesser}.
+ */
+public class FeedItemDuplicateGuesserTest {
+ private static final long MINUTES = 1000 * 60;
+ private static final long DAYS = 24 * 60 * MINUTES;
+
+ @Test
+ public void testSameId() {
+ assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id", "Title1", "example.com/episode1", 0, 5 * MINUTES, "audio/*"),
+ item("id", "Title2", "example.com/episode2", 0, 20 * MINUTES, "video/*")));
+ }
+
+ @Test
+ public void testDuplicateDownloadUrl() {
+ assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title1", "example.com/episode", 0, 5 * MINUTES, "audio/*"),
+ item("id2", "Title2", "example.com/episode", 0, 5 * MINUTES, "audio/*")));
+ assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title1", "example.com/episode1", 0, 5 * MINUTES, "audio/*"),
+ item("id2", "Title2", "example.com/episode2", 0, 5 * MINUTES, "audio/*")));
+ }
+
+ @Test
+ public void testOtherAttributes() {
+ assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
+ item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "audio/*")));
+ assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
+ item("id2", "Title", "example.com/episode2", 20, 6 * MINUTES, "audio/*")));
+ assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title", "example.com/episode1", 10, 5 * MINUTES, "audio/*"),
+ item("id2", "Title", "example.com/episode2", 10, 5 * MINUTES, "video/*")));
+ assertFalse(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title", "example.com/episode1", 5 * DAYS, 5 * MINUTES, "audio/*"),
+ item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "audio/*")));
+ }
+
+ @Test
+ public void testNoMediaType() {
+ assertTrue(FeedItemDuplicateGuesser.seemDuplicates(
+ item("id1", "Title", "example.com/episode1", 2 * DAYS, 5 * MINUTES, ""),
+ item("id2", "Title", "example.com/episode2", 2 * DAYS, 5 * MINUTES, "")));
+ }
+
+ private FeedItem item(String guid, String title, String downloadUrl,
+ long date, long duration, String mime) {
+ FeedItem item = new FeedItem(0, title, guid, "link", new Date(date), FeedItem.PLAYED, null);
+ FeedMedia media = new FeedMedia(item, downloadUrl, duration, mime);
+ item.setMedia(media);
+ return item;
+ }
+} \ No newline at end of file