diff options
| author | Claude <noreply@anthropic.com> | 2026-01-12 08:58:04 +0000 |
|---|---|---|
| committer | Claude <noreply@anthropic.com> | 2026-01-12 08:58:04 +0000 |
| commit | 8524487824f7332223b24e75ab327bf6ec5eccc9 (patch) | |
| tree | c44fb10d82e34d55479cefc62496517c749db3b6 /backend/db | |
| parent | 485486c7ff986712ecb09e92217236d276d317c4 (diff) | |
| download | feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.gz feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.tar.zst feedaka-8524487824f7332223b24e75ab327bf6ec5eccc9.zip | |
refactor: deduplicate articles at insertion time instead of query time
Change deduplication strategy from query-time (ROW_NUMBER window function)
to insertion-time (global guid check before insert).
Benefits:
- Simpler queries without CTE/window functions
- Consistent read state (no duplicate articles to manage)
- Better query performance (no per-query deduplication overhead)
Changes:
- Add CheckArticleExistsByGUID query for global guid lookup
- Add migration to remove existing duplicate articles
- Modify fetchOneFeed and AddFeed to skip duplicates on insert
- Revert GetUnreadArticles/GetReadArticles to simple queries
Diffstat (limited to 'backend/db')
| -rw-r--r-- | backend/db/articles.sql.go | 82 | ||||
| -rw-r--r-- | backend/db/migrations/005_add_guid_index.sql | 10 | ||||
| -rw-r--r-- | backend/db/queries/articles.sql | 50 |
3 files changed, 89 insertions, 53 deletions
diff --git a/backend/db/articles.sql.go b/backend/db/articles.sql.go index 2f6f80d..41c3112 100644 --- a/backend/db/articles.sql.go +++ b/backend/db/articles.sql.go @@ -28,6 +28,48 @@ func (q *Queries) CheckArticleExists(ctx context.Context, arg CheckArticleExists return article_exists, err } +const checkArticleExistsByGUID = `-- name: CheckArticleExistsByGUID :one +SELECT EXISTS( + SELECT 1 FROM articles + WHERE guid = ? +) as article_exists +` + +func (q *Queries) CheckArticleExistsByGUID(ctx context.Context, guid string) (int64, error) { + row := q.db.QueryRowContext(ctx, checkArticleExistsByGUID, guid) + var article_exists int64 + err := row.Scan(&article_exists) + return article_exists, err +} + +const getAllArticleGUIDs = `-- name: GetAllArticleGUIDs :many +SELECT DISTINCT guid +FROM articles +` + +func (q *Queries) GetAllArticleGUIDs(ctx context.Context) ([]string, error) { + rows, err := q.db.QueryContext(ctx, getAllArticleGUIDs) + if err != nil { + return nil, err + } + defer rows.Close() + items := []string{} + for rows.Next() { + var guid string + if err := rows.Scan(&guid); err != nil { + return nil, err + } + items = append(items, guid) + } + if err := rows.Close(); err != nil { + return nil, err + } + if err := rows.Err(); err != nil { + return nil, err + } + return items, nil +} + const createArticle = `-- name: CreateArticle :one INSERT INTO articles (feed_id, guid, title, url, is_read) VALUES (?, ?, ?, ?, ?) @@ -179,19 +221,13 @@ func (q *Queries) GetArticlesByFeed(ctx context.Context, feedID int64) ([]Articl } const getReadArticles = `-- name: GetReadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100 ` @@ -243,19 +279,13 @@ func (q *Queries) GetReadArticles(ctx context.Context, userID int64) ([]GetReadA } const getUnreadArticles = `-- name: GetUnreadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100 ` diff --git a/backend/db/migrations/005_add_guid_index.sql b/backend/db/migrations/005_add_guid_index.sql index a653d79..e3625ee 100644 --- a/backend/db/migrations/005_add_guid_index.sql +++ b/backend/db/migrations/005_add_guid_index.sql @@ -1,2 +1,10 @@ --- Add index on guid for deduplication queries +-- Add index on guid for deduplication CREATE INDEX IF NOT EXISTS idx_articles_guid ON articles(guid); + +-- Remove duplicate articles by guid, keeping only the one with the smallest id +DELETE FROM articles +WHERE id NOT IN ( + SELECT MIN(id) + FROM articles + GROUP BY guid +); diff --git a/backend/db/queries/articles.sql b/backend/db/queries/articles.sql index bd4cd91..134b967 100644 --- a/backend/db/queries/articles.sql +++ b/backend/db/queries/articles.sql @@ -7,35 +7,23 @@ INNER JOIN feeds AS f ON a.feed_id = f.id WHERE a.id = ?; -- name: GetUnreadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 0 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100; -- name: GetReadArticles :many -WITH ranked AS ( - SELECT - a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, - f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed, - ROW_NUMBER() OVER (PARTITION BY a.guid ORDER BY a.id) as rn - FROM articles AS a - INNER JOIN feeds AS f ON a.feed_id = f.id - WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? -) -SELECT id, feed_id, guid, title, url, is_read, feed_id_2, feed_url, feed_title, feed_is_subscribed -FROM ranked -WHERE rn = 1 -ORDER BY id DESC +SELECT + a.id, a.feed_id, a.guid, a.title, a.url, a.is_read, + f.id as feed_id_2, f.url as feed_url, f.title as feed_title, f.is_subscribed as feed_is_subscribed +FROM articles AS a +INNER JOIN feeds AS f ON a.feed_id = f.id +WHERE a.is_read = 1 AND f.is_subscribed = 1 AND f.user_id = ? +ORDER BY a.id DESC LIMIT 100; -- name: GetArticlesByFeed :many @@ -83,3 +71,13 @@ SELECT EXISTS( SELECT 1 FROM articles WHERE feed_id = ? AND guid = ? ) as article_exists; + +-- name: CheckArticleExistsByGUID :one +SELECT EXISTS( + SELECT 1 FROM articles + WHERE guid = ? +) as article_exists; + +-- name: GetAllArticleGUIDs :many +SELECT DISTINCT guid +FROM articles; |
