diff options
| author | Claude <noreply@anthropic.com> | 2026-01-12 08:58:04 +0000 |
|---|---|---|
| committer | Claude <noreply@anthropic.com> | 2026-01-12 08:58:04 +0000 |
| commit | 8524487824f7332223b24e75ab327bf6ec5eccc9 (patch) | |
| tree | c44fb10d82e34d55479cefc62496517c749db3b6 /backend/cmd | |
| parent | 485486c7ff986712ecb09e92217236d276d317c4 (diff) | |
| download | feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.gz feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.zst feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.zip | |
refactor: deduplicate articles at insertion time instead of query time
Change deduplication strategy from query-time (ROW_NUMBER window function)
to insertion-time (global guid check before insert).
Benefits:
- Simpler queries without CTE/window functions
- Consistent read state (no duplicate articles to manage)
- Better query performance (no per-query deduplication overhead)
Changes:
- Add CheckArticleExistsByGUID query for global guid lookup
- Add migration to remove existing duplicate articles
- Modify fetchOneFeed and AddFeed to skip duplicates on insert
- Revert GetUnreadArticles/GetReadArticles to simple queries
Diffstat (limited to 'backend/cmd')
| -rw-r--r-- | backend/cmd/serve.go | 20 |
1 files changed, 16 insertions, 4 deletions
diff --git a/backend/cmd/serve.go b/backend/cmd/serve.go index 28b1282..30d0702 100644 --- a/backend/cmd/serve.go +++ b/backend/cmd/serve.go @@ -47,16 +47,18 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que if err != nil { return err } + // Get GUIDs for this feed (for updating existing articles) guids, err := queries.GetArticleGUIDsByFeed(ctx, feedID) if err != nil { return err } - existingArticleGUIDs := make(map[string]bool) + existingFeedGUIDs := make(map[string]bool) for _, guid := range guids { - existingArticleGUIDs[guid] = true + existingFeedGUIDs[guid] = true } for _, item := range feed.Items { - if existingArticleGUIDs[item.GUID] { + if existingFeedGUIDs[item.GUID] { + // Article exists in this feed, update it err := queries.UpdateArticle(ctx, db.UpdateArticleParams{ Title: item.Title, Url: item.Link, @@ -67,7 +69,17 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que return err } } else { - _, err := queries.CreateArticle(ctx, db.CreateArticleParams{ + // Check if article with same GUID exists globally (in any feed) + exists, err := queries.CheckArticleExistsByGUID(ctx, item.GUID) + if err != nil { + return err + } + if exists == 1 { + // Article already exists in another feed, skip + continue + } + // Create new article + _, err = queries.CreateArticle(ctx, db.CreateArticleParams{ FeedID: feedID, Guid: item.GUID, Title: item.Title, |
