diff options
| author | nsfisis <nsfisis@gmail.com> | 2026-02-14 12:20:31 +0900 |
|---|---|---|
| committer | nsfisis <nsfisis@gmail.com> | 2026-02-14 12:20:31 +0900 |
| commit | 042fcb5c4eac16f18fc051f55a6c63ca9e97306b (patch) | |
| tree | 1a61d1f7690e933a8d1e452e744ac02db14af042 /backend/feed/discover.go | |
| parent | fffd36268a216044523c3f5227c3d375608c36dc (diff) | |
| download | feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.tar.gz feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.tar.zst feedaka-042fcb5c4eac16f18fc051f55a6c63ca9e97306b.zip | |
feat(feed): auto-discover feed URLs from HTML pages
When an HTML page is provided instead of a direct feed URL, parse
<link rel="alternate"> tags to find RSS/Atom feeds. Atom is preferred
over RSS when both are present.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
Diffstat (limited to 'backend/feed/discover.go')
| -rw-r--r-- | backend/feed/discover.go | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/backend/feed/discover.go b/backend/feed/discover.go new file mode 100644 index 0000000..962cbcd --- /dev/null +++ b/backend/feed/discover.go @@ -0,0 +1,94 @@ +package feed + +import ( + "io" + "net/url" + "strings" + + "golang.org/x/net/html" +) + +type feedLink struct { + URL string + Type string // "atom" or "rss" +} + +// discoverFeeds parses an HTML document and extracts feed URLs from +// <link rel="alternate"> tags. It resolves relative URLs against baseURL. +func discoverFeeds(body io.Reader, baseURL *url.URL) []feedLink { + var links []feedLink + z := html.NewTokenizer(body) + for { + tt := z.Next() + switch tt { + case html.ErrorToken: + return links + case html.StartTagToken, html.SelfClosingTagToken: + tn, _ := z.TagName() + tagName := string(tn) + + if tagName == "body" { + return links + } + if tagName != "link" { + continue + } + + attrs := tokenAttrs(z) + rel := strings.ToLower(attrs["rel"]) + typ := strings.ToLower(attrs["type"]) + href := attrs["href"] + + if rel != "alternate" || href == "" { + continue + } + + var feedType string + switch typ { + case "application/atom+xml": + feedType = "atom" + case "application/rss+xml": + feedType = "rss" + default: + continue + } + + ref, err := url.Parse(href) + if err != nil { + continue + } + resolved := baseURL.ResolveReference(ref).String() + links = append(links, feedLink{URL: resolved, Type: feedType}) + } + } +} + +// selectFeed picks the best feed URL from discovered links. +// Prefers Atom over RSS. +func selectFeed(links []feedLink) string { + for _, l := range links { + if l.Type == "atom" { + return l.URL + } + } + for _, l := range links { + if l.Type == "rss" { + return l.URL + } + } + return "" +} + +func tokenAttrs(z *html.Tokenizer) map[string]string { + attrs := make(map[string]string) + for { + key, val, more := z.TagAttr() + if len(key) > 0 { + attrs[string(key)] = string(val) + } + if !more { + break + } + } + return attrs +} |
