From 8524487824f7332223b24e75ab327bf6ec5eccc9 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 12 Jan 2026 08:58:04 +0000 Subject: refactor: deduplicate articles at insertion time instead of query time Change deduplication strategy from query-time (ROW_NUMBER window function) to insertion-time (global guid check before insert). Benefits: - Simpler queries without CTE/window functions - Consistent read state (no duplicate articles to manage) - Better query performance (no per-query deduplication overhead) Changes: - Add CheckArticleExistsByGUID query for global guid lookup - Add migration to remove existing duplicate articles - Modify fetchOneFeed and AddFeed to skip duplicates on insert - Revert GetUnreadArticles/GetReadArticles to simple queries --- backend/db/migrations/005_add_guid_index.sql | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'backend/db/migrations') diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql index a653d79..e3625ee 100644 --- a/backend/db/migrations/005_add_guid_index.sql +++ b/backend/db/migrations/005_add_guid_index.sql @@ -1,2 +1,10 @@ --- Add index on guid for deduplication queries +-- Add index on guid for deduplication CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); + +-- Remove duplicate articles by guid, keeping only the one with the smallest id +DELETE FROM articles +WHERE id NOT IN ( + SELECT MIN(id) + FROM articles + GROUP BY guid +); -- cgit v1.2.3-70-g09d2