From 042fcb5c4eac16f18fc051f55a6c63ca9e97306b Mon Sep 17 00:00:00 2001 From: nsfisis Date: Sat, 14 Feb 2026 12:20:31 +0900 Subject: feat(feed): auto-discover feed URLs from HTML pages When an HTML page is provided instead of a direct feed URL, parse tags to find RSS/Atom feeds. Atom is preferred over RSS when both are present. Co-Authored-By: Claude Opus 4.6 --- backend/feed/feed.go | 63 ++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 59 insertions(+), 4 deletions(-) (limited to 'backend/feed/feed.go') diff --git a/backend/feed/feed.go b/backend/feed/feed.go index 4349d1e..2d84798 100644 --- a/backend/feed/feed.go +++ b/backend/feed/feed.go @@ -2,7 +2,10 @@ package feed import ( "context" + "errors" "fmt" + "net/http" + "net/url" "time" "github.com/mmcdole/gofeed" @@ -10,15 +13,67 @@ import ( "undef.ninja/x/feedaka/db" ) -func Fetch(ctx context.Context, url string) (*gofeed.Feed, error) { +// FetchResult holds the result of fetching a feed, including the resolved URL. +type FetchResult struct { + Feed *gofeed.Feed + URL string +} + +func Fetch(ctx context.Context, rawURL string) (*FetchResult, error) { fp := gofeed.NewParser() ctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() - feed, err := fp.ParseURLWithContext(url, ctx) + + f, err := fp.ParseURLWithContext(rawURL, ctx) + if err == nil { + return &FetchResult{Feed: f, URL: rawURL}, nil + } + + if !errors.Is(err, gofeed.ErrFeedTypeNotDetected) { + return nil, fmt.Errorf("failed to fetch %s: %w", rawURL, err) + } + + discoveredURL, discoverErr := discoverFeedURL(ctx, rawURL) + if discoverErr != nil { + return nil, fmt.Errorf("failed to fetch %s: not a feed and auto-discovery failed: %w", rawURL, discoverErr) + } + + f, err = fp.ParseURLWithContext(discoveredURL, ctx) if err != nil { - return nil, fmt.Errorf("failed to fetch %s: %w", url, err) + return nil, fmt.Errorf("failed to fetch discovered feed %s: %w", discoveredURL, err) } - return feed, nil + + return &FetchResult{Feed: f, URL: discoveredURL}, nil +} + +func discoverFeedURL(ctx context.Context, rawURL string) (string, error) { + req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil) + if err != nil { + return "", err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("HTTP %d", resp.StatusCode) + } + + base, err := url.Parse(rawURL) + if err != nil { + return "", err + } + + links := discoverFeeds(resp.Body, base) + feedURL := selectFeed(links) + if feedURL == "" { + return "", fmt.Errorf("no feed links found in HTML") + } + + return feedURL, nil } func Sync(ctx context.Context, queries *db.Queries, feedID int64, f *gofeed.Feed) error { -- cgit v1.3-1-g0d28