aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorClaude <noreply@anthropic.com>2026-01-12 08:58:04 +0000
committerClaude <noreply@anthropic.com>2026-01-12 08:58:04 +0000
commit8524487824f7332223b24e75ab327bf6ec5eccc9 (patch)
treec44fb10d82e34d55479cefc62496517c749db3b6
parent485486c7ff986712ecb09e92217236d276d317c4 (diff)
downloadfeedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.gz
feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.zst
feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.zip
refactor: deduplicate articles at insertion time instead of query time
Change deduplication strategy from query-time (ROW_NUMBER window function) to insertion-time (global guid check before insert). Benefits: - Simpler queries without CTE/window functions - Consistent read state (no duplicate articles to manage) - Better query performance (no per-query deduplication overhead) Changes: - Add CheckArticleExistsByGUID query for global guid lookup - Add migration to remove existing duplicate articles - Modify fetchOneFeed and AddFeed to skip duplicates on insert - Revert GetUnreadArticles/GetReadArticles to simple queries
-rw-r--r--backend/cmd/serve.go20
-rw-r--r--backend/db/articles.sql.go82
-rw-r--r--backend/db/migrations/005_add_guid_index.sql10
-rw-r--r--backend/db/queries/articles.sql50
-rw-r--r--backend/graphql/resolver/schema.resolvers.go12
5 files changed, 116 insertions, 58 deletions
diff --git a/backend/cmd/serve.go b/backend/cmd/serve.go
index 28b1282..30d0702 100644
--- a/backend/cmd/serve.go
+++ b/backend/cmd/serve.go
@@ -47,16 +47,18 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que
if err != nil {
return err
}
+ // Get GUIDs for this feed (for updating existing articles)
guids, err := queries.GetArticleGUIDsByFeed(ctx, feedID)
if err != nil {
return err
}
- existingArticleGUIDs := make(map[string]bool)
+ existingFeedGUIDs := make(map[string]bool)
for _, guid := range guids {
- existingArticleGUIDs[guid] = true
+ existingFeedGUIDs[guid] = true
}
for _, item := range feed.Items {
- if existingArticleGUIDs[item.GUID] {
+ if existingFeedGUIDs[item.GUID] {
+ // Article exists in this feed, update it
err := queries.UpdateArticle(ctx, db.UpdateArticleParams{
Title: item.Title,
Url: item.Link,
@@ -67,7 +69,17 @@ func fetchOneFeed(feedID int64, url string, ctx context.Context, queries *db.Que
return err
}
} else {
- _, err := queries.CreateArticle(ctx, db.CreateArticleParams{
+ // Check if article with same GUID exists globally (in any feed)
+ exists, err := queries.CheckArticleExistsByGUID(ctx, item.GUID)
+ if err != nil {
+ return err
+ }
+ if exists == 1 {
+ // Article already exists in another feed, skip
+ continue
+ }
+ // Create new article
+ _, err = queries.CreateArticle(ctx, db.CreateArticleParams{
FeedID: feedID,
Guid: item.GUID,
Title: item.Title,
diff --git a/backend/db/articles.sql.go b/backend/db/articles.sql.go
index 2f6f80d..41c3112 100644
--- a/backend/db/articles.sql.go
+++ b/backend/db/articles.sql.go
@@ -28,6 +28,48 @@ func (q *Queries) CheckArticleExists(ctx context.Context, arg CheckArticleExists
return article_exists, err
}
+const checkArticleExistsByGUID = `-- name: CheckArticleExistsByGUID :one
+SELECT EXISTS(
+ SELECT 1 FROM articles
+ WHERE guid = ?
+) as article_exists
+`
+
+func (q *Queries) CheckArticleExistsByGUID(ctx context.Context, guid string) (int64, error) {
+ row := q.db.QueryRowContext(ctx, checkArticleExistsByGUID, guid)
+ var article_exists int64
+ err := row.Scan(&article_exists)
+ return article_exists, err
+}
+
+const getAllArticleGUIDs = `-- name: GetAllArticleGUIDs :many
+SELECT DISTINCT guid
+FROM articles
+`
+
+func (q *Queries) GetAllArticleGUIDs(ctx context.Context) ([]string, error) {
+ rows, err := q.db.QueryContext(ctx, getAllArticleGUIDs)
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+ items := []string{}
+ for rows.Next() {
+ var guid string
+ if err := rows.Scan(&guid); err != nil {
+ return nil, err
+ }
+ items = append(items, guid)
+ }
+ if err := rows.Close(); err != nil {
+ return nil, err
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+ return items, nil
+}
+
const createArticle = `-- name: CreateArticle :one
INSERT INTO articles (feed_id, guid, title, url, is_read)
VALUES (?, ?, ?, ?, ?)
@@ -179,19 +221,13 @@ func (q *Queries) GetArticlesByFeed(ctx context.Context, feedID int64) ([]Articl
}
const getReadArticles = `-- name: GetReadArticles :many
-WITH ranked AS (
- SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
- ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
- FROM articles AS a
- INNER JOIN feeds AS f ON a.feed_id = f.id
- WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
-)
-SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
-FROM ranked
-WHERE rn = 1
-ORDER BY id DESC
+SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
+FROM articles AS a
+INNER JOIN feeds AS f ON a.feed_id = f.id
+WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
+ORDER BY a.id DESC
LIMIT 100
`
@@ -243,19 +279,13 @@ func (q *Queries) GetReadArticles(ctx context.Context, userID int64) ([]GetReadA
}
const getUnreadArticles = `-- name: GetUnreadArticles :many
-WITH ranked AS (
- SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
- ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
- FROM articles AS a
- INNER JOIN feeds AS f ON a.feed_id = f.id
- WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
-)
-SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
-FROM ranked
-WHERE rn = 1
-ORDER BY id DESC
+SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
+FROM articles AS a
+INNER JOIN feeds AS f ON a.feed_id = f.id
+WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
+ORDER BY a.id DESC
LIMIT 100
`
diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql
index a653d79..e3625ee 100644
--- a/backend/db/migrations/005_add_guid_index.sql
+++ b/backend/db/migrations/005_add_guid_index.sql
@@ -1,2 +1,10 @@
--- Add index on guid for deduplication queries
+-- Add index on guid for deduplication
CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid);
+
+-- Remove duplicate articles by guid, keeping only the one with the smallest id
+DELETE FROM articles
+WHERE id NOT IN (
+ SELECT MIN(id)
+ FROM articles
+ GROUP BY guid
+);
diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql
index bd4cd91..134b967 100644
--- a/backend/db/queries/articles.sql
+++ b/backend/db/queries/articles.sql
@@ -7,35 +7,23 @@ INNER JOIN feeds AS f ON a.feed_id = f.id
WHERE a.id = ?;
-- name: GetUnreadArticles :many
-WITH ranked AS (
- SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
- ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
- FROM articles AS a
- INNER JOIN feeds AS f ON a.feed_id = f.id
- WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
-)
-SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
-FROM ranked
-WHERE rn = 1
-ORDER BY id DESC
+SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
+FROM articles AS a
+INNER JOIN feeds AS f ON a.feed_id = f.id
+WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ?
+ORDER BY a.id DESC
LIMIT 100;
-- name: GetReadArticles :many
-WITH ranked AS (
- SELECT
- a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
- f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed,
- ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn
- FROM articles AS a
- INNER JOIN feeds AS f ON a.feed_id = f.id
- WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
-)
-SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed
-FROM ranked
-WHERE rn = 1
-ORDER BY id DESC
+SELECT
+ a.id, a.feed_id, a.guid, a.title, a.url, a.is_read,
+ f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed
+FROM articles AS a
+INNER JOIN feeds AS f ON a.feed_id = f.id
+WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ?
+ORDER BY a.id DESC
LIMIT 100;
-- name: GetArticlesByFeed :many
@@ -83,3 +71,13 @@ SELECT EXISTS(
SELECT 1 FROM articles
WHERE feed_id = ? AND guid = ?
) as article_exists;
+
+-- name: CheckArticleExistsByGUID :one
+SELECT EXISTS(
+ SELECT 1 FROM articles
+ WHERE guid = ?
+) as article_exists;
+
+-- name: GetAllArticleGUIDs :many
+SELECT DISTINCT guid
+FROM articles;
diff --git a/backend/graphql/resolver/schema.resolvers.go b/backend/graphql/resolver/schema.resolvers.go
index 46c39e7..c3f6f0a 100644
--- a/backend/graphql/resolver/schema.resolvers.go
+++ b/backend/graphql/resolver/schema.resolvers.go
@@ -43,8 +43,18 @@ func (r *mutationResolver) AddFeed(ctx context.Context, url string) (*model.Feed
return nil, fmt.Errorf("failed to insert feed: %w", err)
}
- // Insert articles from the feed
+ // Insert articles from the feed (skip duplicates by guid)
for _, item := range feed.Items {
+ // Check if article with same GUID already exists globally
+ exists, err := r.Queries.CheckArticleExistsByGUID(ctx, item.GUID)
+ if err != nil {
+ fmt.Printf("Failed to check article existence: %v\n", err)
+ continue
+ }
+ if exists == 1 {
+ // Article already exists, skip
+ continue
+ }
_, err = r.Queries.CreateArticle(ctx, db.CreateArticleParams{
FeedID: dbFeed.ID,
Guid: item.GUID,