Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,7 @@ internal class FeedbinAccountDelegate(
image_url = entry.images?.size_1?.cdn_url,
published_at = entry.published.toDateTime?.toEpochSecond(),
enclosure_type = enclosureType,
content_hash = null,
)

articleRecords.createStatus(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.jocmp.capy.Feed
import com.jocmp.capy.accounts.AddFeedResult
import com.jocmp.capy.accounts.FeedOption
import com.jocmp.capy.common.ContentFormatter
import com.jocmp.capy.common.MD5
import com.jocmp.capy.common.TimeHelpers.nowUTC
import com.jocmp.capy.common.TimeHelpers.published
import com.jocmp.capy.common.transactionWithErrorHandling
Expand Down Expand Up @@ -239,49 +240,62 @@ internal class LocalAccountDelegate(
) {
val filters = preferences.filterKeywords.get()

val parsedItems = items.mapNotNull { item ->
val publishedAt = published(item.pubDate, fallback = updatedAt).toEpochSecond()
val parsedItem = ParsedItem(item, siteURL = feed.siteURL)
val withinCutoff = cutoffDate == null || publishedAt > cutoffDate.toEpochSecond()
val blocked = containsFilteredText(parsedItem, filters)

if (parsedItem.id != null && withinCutoff && !blocked) {
val contentHash = MD5.from(parsedItem.title + parsedItem.contentHTML.orEmpty())
ParsedArticle(parsedItem, item, publishedAt, contentHash)
} else {
null
}
}

val contentHashes = parsedItems.map { it.contentHash }
val existingHashes = database.articlesQueries
.findExistingHashes(feedID = feed.id, contentHashes = contentHashes)
.executeAsList()
.mapNotNull { it.content_hash }
.toSet()

val newItems = parsedItems.filter { it.contentHash !in existingHashes }

database.transactionWithErrorHandling {
items.forEach { item ->
val publishedAt = published(item.pubDate, fallback = updatedAt).toEpochSecond()
val parsedItem = ParsedItem(
item,
siteURL = feed.siteURL
newItems.forEach { (parsedItem, item, publishedAt, contentHash) ->
val enclosureType = parsedItem.enclosures.firstOrNull()?.type

database.articlesQueries.create(
id = parsedItem.id!!,
feed_id = feed.id,
title = parsedItem.title,
author = item.author,
content_html = parsedItem.contentHTML,
url = parsedItem.url,
summary = item.summary,
extracted_content_url = null,
image_url = parsedItem.imageURL,
published_at = publishedAt,
enclosure_type = enclosureType,
content_hash = contentHash,
)

val withinCutoff = cutoffDate == null || publishedAt > cutoffDate.toEpochSecond()
val blocked = containsFilteredText(parsedItem, filters)

if (parsedItem.id != null && withinCutoff && !blocked) {
val enclosureType = parsedItem.enclosures.firstOrNull()?.type

database.articlesQueries.create(
id = parsedItem.id,
feed_id = feed.id,
title = parsedItem.title,
author = item.author,
content_html = parsedItem.contentHTML,
url = parsedItem.url,
summary = item.summary,
extracted_content_url = null,
image_url = parsedItem.imageURL,
published_at = publishedAt,
enclosure_type = enclosureType,
)
articleRecords.createStatus(
articleID = parsedItem.id,
updatedAt = updatedAt,
read = false,
)

articleRecords.createStatus(
parsedItem.enclosures.forEach {
enclosureRecords.create(
url = it.url.toString(),
type = it.type,
articleID = parsedItem.id,
updatedAt = updatedAt,
read = false
itunesDurationSeconds = it.itunesDurationSeconds?.toString(),
itunesImage = it.itunesImage,
)

parsedItem.enclosures.forEach {
enclosureRecords.create(
url = it.url.toString(),
type = it.type,
articleID = parsedItem.id,
itunesDurationSeconds = it.itunesDurationSeconds?.toString(),
itunesImage = it.itunesImage,
)
}
}
}
}
Expand Down Expand Up @@ -340,6 +354,13 @@ internal class LocalAccountDelegate(
}
}

private data class ParsedArticle(
val parsedItem: ParsedItem,
val item: RssItem,
val publishedAt: Long,
val contentHash: String,
)

internal val RssItem.contentHTML: String?
get() {
val currentContent = content.orEmpty().ifBlank {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ internal class MinifluxAccountDelegate(
image_url = imageURL,
published_at = entry.published_at.toDateTime?.toEpochSecond(),
enclosure_type = enclosures.firstOrNull()?.mime_type,
content_hash = null,
)

articleRecords.createStatus(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@ internal class ReaderAccountDelegate(
image_url = ReaderEnclosureParsing.parsedImageURL(item),
published_at = item.published,
enclosure_type = enclosureType,
content_hash = null,
)

articleRecords.updateStatus(
Expand Down
12 changes: 12 additions & 0 deletions capy/src/main/java/com/jocmp/capy/common/MD5.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.jocmp.capy.common

import java.security.MessageDigest

@OptIn(ExperimentalStdlibApi::class)
object MD5 {
fun from(value: String): String {
val md = MessageDigest.getInstance("MD5")
val digest = md.digest(value.toByteArray())
return digest.toHexString()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ internal fun articleMapper(
imageURL: String?,
publishedAt: Long?,
enclosureType: String?,
@Suppress("UNUSED_PARAMETER") contentHash: String?,
feedTitle: String?,
faviconURL: String?,
enableStickyContent: Boolean,
Expand Down Expand Up @@ -85,6 +86,7 @@ internal fun listMapper(
imageURL = imageURL,
publishedAt = publishedAt,
enclosureType = enclosureType,
contentHash = null,
feedTitle = feedTitle,
faviconURL = faviconURL,
enableStickyContent = false,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE articles ADD COLUMN content_hash TEXT;

CREATE INDEX articles_feed_content_hash ON articles(feed_id, content_hash);
14 changes: 11 additions & 3 deletions capy/src/main/sqldelight/com/jocmp/capy/db/articles.sq
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,8 @@ INSERT INTO articles(
summary,
image_url,
published_at,
enclosure_type
enclosure_type,
content_hash
)
VALUES (
:id,
Expand All @@ -78,7 +79,8 @@ VALUES (
:summary,
:image_url,
:published_at,
:enclosure_type
:enclosure_type,
:content_hash
)
ON CONFLICT(id) DO UPDATE
SET
Expand All @@ -92,7 +94,8 @@ url = excluded.url,
summary = excluded.summary,
image_url = excluded.image_url,
published_at = published_at,
enclosure_type = excluded.enclosure_type;
enclosure_type = excluded.enclosure_type,
content_hash = excluded.content_hash;

createStatus:
INSERT INTO article_statuses(
Expand Down Expand Up @@ -282,6 +285,11 @@ deletePageByID {
DELETE FROM articles WHERE id = :articleID;
}

findExistingHashes:
SELECT content_hash FROM articles
WHERE feed_id = :feedID
AND content_hash IN :contentHashes;

findIDsByFeed:
SELECT id FROM articles WHERE feed_id = :feedID;

Expand Down
3 changes: 2 additions & 1 deletion capy/src/test/java/com/jocmp/capy/fixtures/ArticleFixture.kt
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class ArticleFixture(private val database: Database = InMemoryDatabaseProvider()
published_at = publishedAt,
summary = summary,
url = url,
enclosure_type = null
enclosure_type = null,
content_hash = null,
)
database.articlesQueries.createStatus(
article_id = id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class ArticleMapperTest {
imageURL = "https://cdn.vox-cdn.com/thumbor/r-eWiuX74LfGvTxwenExmwmkPlk=/0x0:1800x1200/1310x873/cdn.vox-cdn.com/uploads/chorus_image/image/73010063/Vizio_TV_D_Series_Lifestyle.0.jpg",
publishedAt = 1703960809,
enclosureType = null,
contentHash = null,
feedTitle = "",
faviconURL = null,
enableStickyContent = false,
Expand Down
Loading