From 485486c7ff986712ecb09e92217236d276d317c4 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 12 Jan 2026 08:46:55 +0000 Subject: feat: deduplicate feed items by guid Use ROW_NUMBER() window function to filter out duplicate articles with the same guid across different feeds. This ensures each guid appears only once in unread/read article lists. - Add idx_articles_guid index for PARTITION BY performance - Modify GetUnreadArticles and GetReadArticles queries with CTE --- backend/db/articles.sql.go | 40 ++++++++++++++++++---------- backend/db/migrations/005_add_guid_index.sql | 2 ++ backend/db/queries/articles.sql | 40 ++++++++++++++++++---------- backend/db/schema.sql | 2 ++ 4 files changed, 56 insertions(+), 28 deletions(-) create mode 100644 backend/db/migrations/005_add_guid_index.sql (limited to 'backend') diff --git a/backend/db/articles.sql.go b/backend/db/articles.sql.go index bf6ea1d..2f6f80d 100644 --- a/backend/db/articles.sql.go +++ b/backend/db/articles.sql.go @@ -179,13 +179,19 @@ func (q *Queries) GetArticlesByFeed(ctx context.Context, feedID int64) ([]Articl } const getReadArticles = `-- name: GetReadArticles :many -SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed -FROM articles AS a -INNER JOIN feeds AS f ON a.feed_id = f.id -WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? -ORDER BY a.id DESC +WITH ranked AS ( + SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, + ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn + FROM articles AS a + INNER JOIN feeds AS f ON a.feed_id = f.id + WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? +) +SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed +FROM ranked +WHERE rn = 1 +ORDER BY id DESC LIMIT 100 ` @@ -237,13 +243,19 @@ func (q *Queries) GetReadArticles(ctx context.Context, userID int64) ([]GetReadA } const getUnreadArticles = `-- name: GetUnreadArticles :many -SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed -FROM articles AS a -INNER JOIN feeds AS f ON a.feed_id = f.id -WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? -ORDER BY a.id DESC +WITH ranked AS ( + SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, + ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn + FROM articles AS a + INNER JOIN feeds AS f ON a.feed_id = f.id + WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? +) +SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed +FROM ranked +WHERE rn = 1 +ORDER BY id DESC LIMIT 100 ` diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql new file mode 100644 index 0000000..a653d79 --- /dev/null +++ b/backend/db/migrations/005_add_guid_index.sql @@ -0,0 +1,2 @@ +-- Add index on guid for deduplication queries +CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql index 5acdada..bd4cd91 100644 --- a/backend/db/queries/articles.sql +++ b/backend/db/queries/articles.sql @@ -7,23 +7,35 @@ INNER JOIN feeds AS f ON a.feed_id = f.id WHERE a.id = ?; -- name: GetUnreadArticles :many -SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed -FROM articles AS a -INNER JOIN feeds AS f ON a.feed_id = f.id -WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? -ORDER BY a.id DESC +WITH ranked AS ( + SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, + ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn + FROM articles AS a + INNER JOIN feeds AS f ON a.feed_id = f.id + WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? +) +SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed +FROM ranked +WHERE rn = 1 +ORDER BY id DESC LIMIT 100; -- name: GetReadArticles :many -SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed -FROM articles AS a -INNER JOIN feeds AS f ON a.feed_id = f.id -WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? -ORDER BY a.id DESC +WITH ranked AS ( + SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, + ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn + FROM articles AS a + INNER JOIN feeds AS f ON a.feed_id = f.id + WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? +) +SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed +FROM ranked +WHERE rn = 1 +ORDER BY id DESC LIMIT 100; -- name: GetArticlesByFeed :many diff --git a/backend/db/schema.sql b/backend/db/schema.sql index adf3b37..07ac72d 100644 --- a/backend/db/schema.sql +++ b/backend/db/schema.sql @@ -34,4 +34,6 @@ CREATE INDEX IF NOT EXISTS idx_articles_feed_guid ON articles(feed_id, guid); CREATE INDEX IF NOT EXISTS idx_articles_is_read ON articles(is_read); +CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); + CREATE INDEX IF NOT EXISTS idx_feeds_user_id ON feeds(user_id); -- cgit v1.2.3-70-g09d2