diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-02-14 12:20:31 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-02-14 12:20:31 +0900 |
| commit | 042fcb5c4eac16f18fc051f55a6c63ca9e97306b (patch) | |
| tree | 1a61d1f7690e933a8d1e452e744ac02db14af042 /backend/feed/feed.go | |
| parent | fffd36268a216044523c3f5227c3d375608c36dc (diff) | |
| download | feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.tar.gz feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.tar.zst feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.zip | |
feat(feed): auto-discover feed URLs from HTML pages
When an HTML page is provided instead of a direct feed URL, parse
<link rel="alternate"> tags to find RSS/Atom feeds. Atom is preferred
over RSS when both are present.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'backend/feed/feed.go')
| -rw-r--r-- | backend/feed/feed.go | 63 |
1 files changed, 59 insertions, 4 deletions
diff --git a/backend/feed/feed.go b/backend/feed/feed.go index 4349d1e..2d84798 100644 --- a/backend/feed/feed.go +++ b/backend/feed/feed.go @@ -2,7 +2,10 @@ package feed import ( "context" + "errors" "fmt" + "net/http" + "net/url" "time" "github.com/mmcdole/gofeed" @@ -10,15 +13,67 @@ import ( "undef.ninja/x/feedaka/db" ) -func Fetch(ctx context.Context, url string) (*gofeed.Feed, error) { +// FetchResult holds the result of fetching a feed, including the resolved URL. +type FetchResult struct { + Feed *gofeed.Feed + URL string +} + +func Fetch(ctx context.Context, rawURL string) (*FetchResult, error) { fp := gofeed.NewParser() ctx, cancel := context.WithTimeout(ctx, 10*time.Second) defer cancel() - feed, err := fp.ParseURLWithContext(url, ctx) + + f, err := fp.ParseURLWithContext(rawURL, ctx) + if err == nil { + return &FetchResult{Feed: f, URL: rawURL}, nil + } + + if !errors.Is(err, gofeed.ErrFeedTypeNotDetected) { + return nil, fmt.Errorf("failed to fetch %s: %w", rawURL, err) + } + + discoveredURL, discoverErr := discoverFeedURL(ctx, rawURL) + if discoverErr != nil { + return nil, fmt.Errorf("failed to fetch %s: not a feed and auto-discovery failed: %w", rawURL, discoverErr) + } + + f, err = fp.ParseURLWithContext(discoveredURL, ctx) if err != nil { - return nil, fmt.Errorf("failed to fetch %s: %w", url, err) + return nil, fmt.Errorf("failed to fetch discovered feed %s: %w", discoveredURL, err) } - return feed, nil + + return &FetchResult{Feed: f, URL: discoveredURL}, nil +} + +func discoverFeedURL(ctx context.Context, rawURL string) (string, error) { + req, err := http.NewRequestWithContext(ctx, "GET", rawURL, nil) + if err != nil { + return "", err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode < 200 || resp.StatusCode >= 300 { + return "", fmt.Errorf("HTTP %d", resp.StatusCode) + } + + base, err := url.Parse(rawURL) + if err != nil { + return "", err + } + + links := discoverFeeds(resp.Body, base) + feedURL := selectFeed(links) + if feedURL == "" { + return "", fmt.Errorf("no feed links found in HTML") + } + + return feedURL, nil } func Sync(ctx context.Context, queries *db.Queries, feedID int64, f *gofeed.Feed) error { |
