diff options
Diffstat (limited to 'backend/feed/discover.go')
| -rw-r--r-- | backend/feed/discover.go | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/backend/feed/discover.go b/backend/feed/discover.go new file mode 100644 index 0000000..962cbcd --- /dev/null +++ b/backend/feed/discover.go @@ -0,0 +1,94 @@ +package feed + +import ( + "io" + "net/url" + "strings" + + "golang.org/x/net/html" +) + +type feedLink struct { + URL string + Type string // "atom" or "rss" +} + +// discoverFeeds parses an HTML document and extracts feed URLs from +// <link rel="alternate"> tags. It resolves relative URLs against baseURL. +func discoverFeeds(body io.Reader, baseURL *url.URL) []feedLink { + var links []feedLink + z := html.NewTokenizer(body) + for { + tt := z.Next() + switch tt { + case html.ErrorToken: + return links + case html.StartTagToken, html.SelfClosingTagToken: + tn, _ := z.TagName() + tagName := string(tn) + + if tagName == "body" { + return links + } + if tagName != "link" { + continue + } + + attrs := tokenAttrs(z) + rel := strings.ToLower(attrs["rel"]) + typ := strings.ToLower(attrs["type"]) + href := attrs["href"] + + if rel != "alternate" || href == "" { + continue + } + + var feedType string + switch typ { + case "application/atom+xml": + feedType = "atom" + case "application/rss+xml": + feedType = "rss" + default: + continue + } + + ref, err := url.Parse(href) + if err != nil { + continue + } + resolved := baseURL.ResolveReference(ref).String() + links = append(links, feedLink{URL: resolved, Type: feedType}) + } + } +} + +// selectFeed picks the best feed URL from discovered links. +// Prefers Atom over RSS. +func selectFeed(links []feedLink) string { + for _, l := range links { + if l.Type == "atom" { + return l.URL + } + } + for _, l := range links { + if l.Type == "rss" { + return l.URL + } + } + return "" +} + +func tokenAttrs(z *html.Tokenizer) map[string]string { + attrs := make(map[string]string) + for { + key, val, more := z.TagAttr() + if len(key) > 0 { + attrs[string(key)] = string(val) + } + if !more { + break + } + } + return attrs +} |
