aboutsummaryrefslogtreecommitdiffhomepage
path: root/backend
diff options
context:
space:
mode:
authorClaude <noreply@anthropic.com>2026-01-12 08:46:55 +0000
committerClaude <noreply@anthropic.com>2026-01-12 08:46:55 +0000
commit485486c7ff986712ecb09e92217236d276d317c4 (patch)
tree8c8c49747fd4d9db6cc24c821a2e27aced01a271 /backend
parent67bfaf8f103295c5562db02a0044e311e3504518 (diff)
downloadfeedaka-485486c7ff986712ecb09e92217236d276d317c4.tar.gz
feedaka-485486c7ff986712ecb09e92217236d276d317c4.tar.zst
feedaka-485486c7ff986712ecb09e92217236d276d317c4.zip
feat: deduplicate feed items by guid
Use ROW_NUMBER() window function to filter out duplicate articles with the same guid across different feeds. This ensures each guid appears only once in unread/read article lists. - Add idx_articles_guid index for PARTITION BY performance - Modify GetUnreadArticles and GetReadArticles queries with CTE
Diffstat (limited to 'backend')
-rw-r--r--backend/db/articles.sql.go40
-rw-r--r--backend/db/migrations/005_add_guid_index.sql2
-rw-r--r--backend/db/queries/articles.sql40
-rw-r--r--backend/db/schema.sql2
4 files changed, 56 insertions, 28 deletions
diff --git a/backend/db/articles.sql.go b/backend/db/articles.sql.go
index bf6ea1d..2f6f80d 100644
--- a/backend/db/articles.sql.go
+++ b/backend/db/articles.sql.go
@@ -179,13 +179,19 @@ func (q *Queries) GetArticlesByFeed(ctx context.Context, feedID int64) ([]Articl
}
const getReadArticles = `-- name: GetReadArticles :many
-SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
-FROM articles AS a
-INNER JOIN feeds AS f ON a.feed_id = f.id
-WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
-ORDER BY a.id DESC
+WITH ranked AS (
+ SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
+ ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
+ FROM articles AS a
+ INNER JOIN feeds AS f ON a.feed_id = f.id
+ WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
+)
+SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
+FROM ranked
+WHERE rn = 1
+ORDER BY id DESC
LIMIT 100
`
@@ -237,13 +243,19 @@ func (q *Queries) GetReadArticles(ctx context.Context, userID int64) ([]GetReadA
}
const getUnreadArticles = `-- name: GetUnreadArticles :many
-SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
-FROM articles AS a
-INNER JOIN feeds AS f ON a.feed_id = f.id
-WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
-ORDER BY a.id DESC
+WITH ranked AS (
+ SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
+ ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
+ FROM articles AS a
+ INNER JOIN feeds AS f ON a.feed_id = f.id
+ WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
+)
+SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
+FROM ranked
+WHERE rn = 1
+ORDER BY id DESC
LIMIT 100
`
diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql
new file mode 100644
index 0000000..a653d79
--- /dev/null
+++ b/backend/db/migrations/005_add_guid_index.sql
@@ -0,0 +1,2 @@
+-- Add index on guid for deduplication queries
+CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid);
diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql
index 5acdada..bd4cd91 100644
--- a/backend/db/queries/articles.sql
+++ b/backend/db/queries/articles.sql
@@ -7,23 +7,35 @@ INNER JOIN feeds AS f ON a.feed_id = f.id
WHERE a.id = ?;
-- name: GetUnreadArticles :many
-SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
-FROM articles AS a
-INNER JOIN feeds AS f ON a.feed_id = f.id
-WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
-ORDER BY a.id DESC
+WITH ranked AS (
+ SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
+ ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
+ FROM articles AS a
+ INNER JOIN feeds AS f ON a.feed_id = f.id
+ WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
+)
+SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
+FROM ranked
+WHERE rn = 1
+ORDER BY id DESC
LIMIT 100;
-- name: GetReadArticles :many
-SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
-FROM articles AS a
-INNER JOIN feeds AS f ON a.feed_id = f.id
-WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
-ORDER BY a.id DESC
+WITH ranked AS (
+ SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
+ ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
+ FROM articles AS a
+ INNER JOIN feeds AS f ON a.feed_id = f.id
+ WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
+)
+SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
+FROM ranked
+WHERE rn = 1
+ORDER BY id DESC
LIMIT 100;
-- name: GetArticlesByFeed :many
diff --git a/backend/db/schema.sql b/backend/db/schema.sql
index adf3b37..07ac72d 100644
--- a/backend/db/schema.sql
+++ b/backend/db/schema.sql
@@ -34,4 +34,6 @@ CREATE INDEX IF NOT EXISTS idx_articles_feed_guid ON articles(feed_id, guid);
CREATE INDEX IF NOT EXISTS idx_articles_is_read ON articles(is_read);
+CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid);
+
CREATE INDEX IF NOT EXISTS idx_feeds_user_id ON feeds(user_id);