diff options
| author | nsfisis <54318333+nsfisis@users.noreply.github.com> | 2026-01-12 18:13:30 +0900 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2026-01-12 18:13:30 +0900 |
| commit | 8ca7887979bb045eea6331360b2bedc6de7f3d34 (patch) | |
| tree | fbf632ed9c179dd109afa6e1749a446f540a45c4 | |
| parent | 67bfaf8f103295c5562db02a0044e311e3504518 (diff) | |
| parent | 86254f8eb7f812915464c8733f7017a6e0a3dd4d (diff) | |
| download | feedaka-8ca7887979bb045eea6331360b2bedc6de7f3d34.tar.gz feedaka-8ca7887979bb045eea6331360b2bedc6de7f3d34.tar.zst feedaka-8ca7887979bb045eea6331360b2bedc6de7f3d34.zip | |
Prevent duplicate feed items with same GUID
| -rw-r--r-- | backend/cmd/serve.go | 20 | ||||
| -rw-r--r-- | backend/db/articles.sql.go | 14 | ||||
| -rw-r--r-- | backend/db/migrations/005_add_guid_index.sql | 10 | ||||
| -rw-r--r-- | backend/db/queries/articles.sql | 6 | ||||
| -rw-r--r-- | backend/db/schema.sql | 2 | ||||
| -rw-r--r-- | backend/graphql/resolver/schema.resolvers.go | 12 |
6 files changed, 59 insertions, 5 deletions
diff --git a/backend/cmd/serve.go b/backend/cmd/serve.go index 28b1282..30d0702 100644 --- a/backend/cmd/serve.go +++ b/backend/cmd/serve.go @@ -47,16 +47,18 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que if err != nil { return err } + // Get GUIDs for this feed (for updating existing articles) guids, err := queries.GetArticleGUIDsByFeed(ctx, feedID) if err != nil { return err } - existingArticleGUIDs := make(map[string]bool) + existingFeedGUIDs := make(map[string]bool) for _, guid := range guids { - existingArticleGUIDs[guid] = true + existingFeedGUIDs[guid] = true } for _, item := range feed.Items { - if existingArticleGUIDs[item.GUID] { + if existingFeedGUIDs[item.GUID] { + // Article exists in this feed, update it err := queries.UpdateArticle(ctx, db.UpdateArticleParams{ Title: item.Title, Url: item.Link, @@ -67,7 +69,17 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que return err } } else { - _, err := queries.CreateArticle(ctx, db.CreateArticleParams{ + // Check if article with same GUID exists globally (in any feed) + exists, err := queries.CheckArticleExistsByGUID(ctx, item.GUID) + if err != nil { + return err + } + if exists == 1 { + // Article already exists in another feed, skip + continue + } + // Create new article + _, err = queries.CreateArticle(ctx, db.CreateArticleParams{ FeedID: feedID, Guid: item.GUID, Title: item.Title, diff --git a/backend/db/articles.sql.go b/backend/db/articles.sql.go index bf6ea1d..7f6400b 100644 --- a/backend/db/articles.sql.go +++ b/backend/db/articles.sql.go @@ -28,6 +28,20 @@ func (q *Queries) CheckArticleExists(ctx context.Context, arg CheckArticleExists return article_exists, err } +const checkArticleExistsByGUID = `-- name: CheckArticleExistsByGUID :one +SELECT EXISTS( + SELECT 1 FROM articles + WHERE guid = ? +) as article_exists +` + +func (q *Queries) CheckArticleExistsByGUID(ctx context.Context, guid string) (int64, error) { + row := q.db.QueryRowContext(ctx, checkArticleExistsByGUID, guid) + var article_exists int64 + err := row.Scan(&article_exists) + return article_exists, err +} + const createArticle = `-- name: CreateArticle :one INSERT INTO articles (feed_id, guid, title, url, is_read) VALUES (?, ?, ?, ?, ?) diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql new file mode 100644 index 0000000..e3625ee --- /dev/null +++ b/backend/db/migrations/005_add_guid_index.sql @@ -0,0 +1,10 @@ +-- Add index on guid for deduplication +CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); + +-- Remove duplicate articles by guid, keeping only the one with the smallest id +DELETE FROM articles +WHERE id NOT IN ( + SELECT MIN(id) + FROM articles + GROUP BY guid +); diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql index 5acdada..2c00678 100644 --- a/backend/db/queries/articles.sql +++ b/backend/db/queries/articles.sql @@ -71,3 +71,9 @@ SELECT EXISTS( SELECT 1 FROM articles WHERE feed_id = ? AND guid = ? ) as article_exists; + +-- name: CheckArticleExistsByGUID :one +SELECT EXISTS( + SELECT 1 FROM articles + WHERE guid = ? +) as article_exists; diff --git a/backend/db/schema.sql b/backend/db/schema.sql index adf3b37..07ac72d 100644 --- a/backend/db/schema.sql +++ b/backend/db/schema.sql @@ -34,4 +34,6 @@ CREATE INDEX IF NOT EXISTS idx_articles_feed_guid ON articles(feed_id, guid); CREATE INDEX IF NOT EXISTS idx_articles_is_read ON articles(is_read); +CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); + CREATE INDEX IF NOT EXISTS idx_feeds_user_id ON feeds(user_id); diff --git a/backend/graphql/resolver/schema.resolvers.go b/backend/graphql/resolver/schema.resolvers.go index 46c39e7..c3f6f0a 100644 --- a/backend/graphql/resolver/schema.resolvers.go +++ b/backend/graphql/resolver/schema.resolvers.go @@ -43,8 +43,18 @@ func (r *mutationResolver) AddFeed(ctx context.Context, url string) (*model.Feed return nil, fmt.Errorf("failed to insert feed: %w", err) } - // Insert articles from the feed + // Insert articles from the feed (skip duplicates by guid) for _, item := range feed.Items { + // Check if article with same GUID already exists globally + exists, err := r.Queries.CheckArticleExistsByGUID(ctx, item.GUID) + if err != nil { + fmt.Printf("Failed to check article existence: %v\n", err) + continue + } + if exists == 1 { + // Article already exists, skip + continue + } _, err = r.Queries.CreateArticle(ctx, db.CreateArticleParams{ FeedID: dbFeed.ID, Guid: item.GUID, |
