From 8524487824f7332223b24e75ab327bf6ec5eccc9 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 12 Jan 2026 08:58:04 +0000 Subject: refactor: deduplicate articles at insertion time instead of query time Change deduplication strategy from query-time (ROW_NUMBER window function) to insertion-time (global guid check before insert). Benefits: - Simpler queries without CTE/window functions - Consistent read state (no duplicate articles to manage) - Better query performance (no per-query deduplication overhead) Changes: - Add CheckArticleExistsByGUID query for global guid lookup - Add migration to remove existing duplicate articles - Modify fetchOneFeed and AddFeed to skip duplicates on insert - Revert GetUnreadArticles/GetReadArticles to simple queries --- backend/db/queries/articles.sql | 50 ++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 26 deletions(-) (limited to 'backend/db/queries/articles.sql') diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql index bd4cd91..134b967 100644 --- a/backend/db/queries/articles.sql +++ b/backend/db/queries/articles.sql @@ -7,35 +7,23 @@ INNER JOIN feeds AS f ON a.feed_id = f.id WHERE a.id = ?; -- name: GetUnreadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100; -- name: GetReadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100; -- name: GetArticlesByFeed :many @@ -83,3 +71,13 @@ SELECT EXISTS( SELECT 1 FROM articles WHERE feed_id = ? AND guid = ? ) as article_exists; + +-- name: CheckArticleExistsByGUID :one +SELECT EXISTS( + SELECT 1 FROM articles + WHERE guid = ? +) as article_exists; + +-- name: GetAllArticleGUIDs :many +SELECT DISTINCT guid +FROM articles; -- cgit v1.2.3-70-g09d2