From 8524487824f7332223b24e75ab327bf6ec5eccc9 Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 12 Jan 2026 08:58:04 +0000 Subject: refactor: deduplicate articles at insertion time instead of query time Change deduplication strategy from query-time (ROW_NUMBER window function) to insertion-time (global guid check before insert). Benefits: - Simpler queries without CTE/window functions - Consistent read state (no duplicate articles to manage) - Better query performance (no per-query deduplication overhead) Changes: - Add CheckArticleExistsByGUID query for global guid lookup - Add migration to remove existing duplicate articles - Modify fetchOneFeed and AddFeed to skip duplicates on insert - Revert GetUnreadArticles/GetReadArticles to simple queries --- backend/cmd/serve.go | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'backend/cmd') diff --git a/backend/cmd/serve.go b/backend/cmd/serve.go index 28b1282..30d0702 100644 --- a/backend/cmd/serve.go +++ b/backend/cmd/serve.go @@ -47,16 +47,18 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que if err != nil { return err } + // Get GUIDs for this feed (for updating existing articles) guids, err := queries.GetArticleGUIDsByFeed(ctx, feedID) if err != nil { return err } - existingArticleGUIDs := make(map[string]bool) + existingFeedGUIDs := make(map[string]bool) for _, guid := range guids { - existingArticleGUIDs[guid] = true + existingFeedGUIDs[guid] = true } for _, item := range feed.Items { - if existingArticleGUIDs[item.GUID] { + if existingFeedGUIDs[item.GUID] { + // Article exists in this feed, update it err := queries.UpdateArticle(ctx, db.UpdateArticleParams{ Title: item.Title, Url: item.Link, @@ -67,7 +69,17 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que return err } } else { - _, err := queries.CreateArticle(ctx, db.CreateArticleParams{ + // Check if article with same GUID exists globally (in any feed) + exists, err := queries.CheckArticleExistsByGUID(ctx, item.GUID) + if err != nil { + return err + } + if exists == 1 { + // Article already exists in another feed, skip + continue + } + // Create new article + _, err = queries.CreateArticle(ctx, db.CreateArticleParams{ FeedID: feedID, Guid: item.GUID, Title: item.Title, -- cgit v1.2.3-70-g09d2