From 042fcb5c4eac16f18fc051f55a6c63ca9e97306b Mon Sep 17 00:00:00 2001 From: nsfisis Date: Sat, 14 Feb 2026 12:20:31 +0900 Subject: feat(feed): auto-discover feed URLs from HTML pages When an HTML page is provided instead of a direct feed URL, parse tags to find RSS/Atom feeds. Atom is preferred over RSS when both are present. Co-Authored-By: Claude Opus 4.6 --- backend/feed/discover_test.go | 129 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 129 insertions(+) create mode 100644 backend/feed/discover_test.go (limited to 'backend/feed/discover_test.go') diff --git a/backend/feed/discover_test.go b/backend/feed/discover_test.go new file mode 100644 index 0000000..4a1315e --- /dev/null +++ b/backend/feed/discover_test.go @@ -0,0 +1,129 @@ +package feed + +import ( + "net/url" + "strings" + "testing" +) + +func mustParseURL(s string) *url.URL { + u, err := url.Parse(s) + if err != nil { + panic(err) + } + return u +} + +func TestDiscoverFeeds_AtomAndRSS(t *testing.T) { + html := ` + + + + + + +` + + base := mustParseURL("https://example.com/blog") + links := discoverFeeds(strings.NewReader(html), base) + + if len(links) != 2 { + t.Fatalf("expected 2 links, got %d", len(links)) + } + if links[0].URL != "https://example.com/feed.atom" || links[0].Type != "atom" { + t.Errorf("unexpected first link: %+v", links[0]) + } + if links[1].URL != "https://example.com/feed.rss" || links[1].Type != "rss" { + t.Errorf("unexpected second link: %+v", links[1]) + } +} + +func TestDiscoverFeeds_AbsoluteURL(t *testing.T) { + html := ` + +` + + base := mustParseURL("https://example.com") + links := discoverFeeds(strings.NewReader(html), base) + + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d", len(links)) + } + if links[0].URL != "https://other.com/feed.xml" { + t.Errorf("expected absolute URL preserved, got %s", links[0].URL) + } +} + +func TestDiscoverFeeds_NoFeeds(t *testing.T) { + html := `No feeds` + links := discoverFeeds(strings.NewReader(html), mustParseURL("https://example.com")) + if len(links) != 0 { + t.Fatalf("expected 0 links, got %d", len(links)) + } +} + +func TestDiscoverFeeds_IgnoresNonAlternate(t *testing.T) { + html := ` + + +` + + links := discoverFeeds(strings.NewReader(html), mustParseURL("https://example.com")) + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d", len(links)) + } +} + +func TestDiscoverFeeds_IgnoresUnknownTypes(t *testing.T) { + html := ` + + +` + + links := discoverFeeds(strings.NewReader(html), mustParseURL("https://example.com")) + if len(links) != 1 { + t.Fatalf("expected 1 link, got %d", len(links)) + } + if links[0].Type != "rss" { + t.Errorf("expected rss, got %s", links[0].Type) + } +} + +func TestDiscoverFeeds_StopsAtBody(t *testing.T) { + html := ` + +` + + links := discoverFeeds(strings.NewReader(html), mustParseURL("https://example.com")) + if len(links) != 0 { + t.Fatalf("expected 0 links (should stop at body), got %d", len(links)) + } +} + +func TestSelectFeed_PrefersAtom(t *testing.T) { + links := []feedLink{ + {URL: "https://example.com/rss", Type: "rss"}, + {URL: "https://example.com/atom", Type: "atom"}, + } + got := selectFeed(links) + if got != "https://example.com/atom" { + t.Errorf("expected Atom URL, got %s", got) + } +} + +func TestSelectFeed_FallsBackToRSS(t *testing.T) { + links := []feedLink{ + {URL: "https://example.com/rss", Type: "rss"}, + } + got := selectFeed(links) + if got != "https://example.com/rss" { + t.Errorf("expected RSS URL, got %s", got) + } +} + +func TestSelectFeed_EmptyList(t *testing.T) { + got := selectFeed(nil) + if got != "" { + t.Errorf("expected empty string, got %s", got) + } +} -- cgit v1.3-1-g0d28