diff --git a/internal/linkcheck/meta.go b/internal/linkcheck/meta.go
new file mode 100644
index 0000000..6217844
--- /dev/null
+++ b/internal/linkcheck/meta.go
@@ -0,0 +1,66 @@
+package linkcheck
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "io"
+ "regexp"
+
+ "golang.org/x/net/html"
+)
+
+var (
+ errNoMetaRedir = fmt.Errorf("no meta-redir found")
+ metaRedirContent = regexp.MustCompile(`^[0-9]+;\s*url=(.*)$`)
+)
+
+//nolint:gocognit // Makes no sense to split
+func resolveMetaRedirect(body []byte) (redir string, err error) {
+ tok := html.NewTokenizer(bytes.NewReader(body))
+
+tokenLoop:
+ for {
+ token := tok.Next()
+ switch token {
+ case html.ErrorToken:
+ if errors.Is(tok.Err(), io.EOF) {
+ break tokenLoop
+ }
+ return "", fmt.Errorf("scanning tokens: %w", tok.Err())
+
+ case html.StartTagToken:
+ t := tok.Token()
+ if t.Data == "meta" {
+ var (
+ content string
+ isRedirect bool
+ )
+
+ for _, attr := range t.Attr {
+ isRedirect = isRedirect || attr.Key == "http-equiv" && attr.Val == "refresh"
+
+ if attr.Key == "content" {
+ content = attr.Val
+ }
+ }
+
+ if !isRedirect {
+ continue tokenLoop
+ }
+
+ // It is a redirect, get the content and parse it
+ if matches := metaRedirContent.FindStringSubmatch(content); len(matches) > 1 {
+ redir = matches[1]
+ }
+ }
+ }
+ }
+
+ if redir == "" {
+ // We did not find anything
+ return "", errNoMetaRedir
+ }
+
+ return redir, nil
+}
diff --git a/internal/linkcheck/meta_test.go b/internal/linkcheck/meta_test.go
new file mode 100644
index 0000000..4edc30b
--- /dev/null
+++ b/internal/linkcheck/meta_test.go
@@ -0,0 +1,41 @@
+package linkcheck
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+ "github.com/stretchr/testify/require"
+)
+
+func TestResolveMetaRedir(t *testing.T) {
+ testDoc := []byte(`
+
+
+
+
+
+
+
+
+
+`)
+
+ redir, err := resolveMetaRedirect(testDoc)
+ require.NoError(t, err)
+ assert.Equal(t, "https://github.com/Luzifer/twitch-bot", redir)
+
+ testDoc = []byte(`
+
+
+
+
+
+
+
+
+`)
+
+ redir, err = resolveMetaRedirect(testDoc)
+ require.ErrorIs(t, err, errNoMetaRedir)
+ assert.Equal(t, "", redir)
+}
diff --git a/internal/linkcheck/resolver.go b/internal/linkcheck/resolver.go
index 310980d..e854027 100644
--- a/internal/linkcheck/resolver.go
+++ b/internal/linkcheck/resolver.go
@@ -4,6 +4,7 @@ import (
"context"
"crypto/rand"
_ "embed"
+ "io"
"math/big"
"net/http"
"net/http/cookiejar"
@@ -13,7 +14,6 @@ import (
"sync"
"time"
- "github.com/Luzifer/go_helpers/v2/str"
"github.com/sirupsen/logrus"
)
@@ -88,12 +88,12 @@ func (resolver) getJar() *cookiejar.Jar {
// that link after all redirects were followed
//
//nolint:gocyclo
-func (r resolver) resolveFinal(link string, cookieJar *cookiejar.Jar, callStack []string, userAgent string) string {
+func (r resolver) resolveFinal(link string, cookieJar *cookiejar.Jar, callStack *stack, userAgent string) string {
if !linkTest.MatchString(link) && !r.skipValidation {
return ""
}
- if str.StringInSlice(link, callStack) || len(callStack) == maxRedirects {
+ if callStack.Count(link) > 2 || callStack.Height() == maxRedirects {
// We got ourselves a loop: Yay!
return link
}
@@ -155,10 +155,35 @@ func (r resolver) resolveFinal(link string, cookieJar *cookiejar.Jar, callStack
return ""
}
target := r.resolveReference(u, tu)
- return r.resolveFinal(target, cookieJar, append(callStack, link), userAgent)
+ callStack.Visit(link)
+ return r.resolveFinal(target, cookieJar, callStack, userAgent)
}
- // We got a response, it's no redirect, we count this as a success
+ // We got a response, it's no redirect, lets check for in-document stuff
+ docBody, err := io.ReadAll(resp.Body)
+ if err != nil {
+ return ""
+ }
+
+ if metaRedir, err := resolveMetaRedirect(docBody); err == nil {
+ // Meta-Redirect found
+ tu, err := url.Parse(metaRedir)
+ if err != nil {
+ return ""
+ }
+ target := r.resolveReference(u, tu)
+ callStack.Visit(link)
+ return r.resolveFinal(target, cookieJar, callStack, userAgent)
+ }
+
+ if resp.Header.Get("Set-Cookie") != "" {
+ // A new cookie was set, lets refresh the page once to see if stuff
+ // changes with that new cookie
+ callStack.Visit(link)
+ return r.resolveFinal(u.String(), cookieJar, callStack, userAgent)
+ }
+
+ // We had no in-document redirects: we count this as a success
return u.String()
}
@@ -201,7 +226,7 @@ func (resolver) resolveReference(origin *url.URL, loc *url.URL) string {
func (r resolver) runResolver() {
for qe := range r.resolverC {
- if link := r.resolveFinal(qe.Link, r.getJar(), nil, r.userAgent()); link != "" {
+ if link := r.resolveFinal(qe.Link, r.getJar(), &stack{}, r.userAgent()); link != "" {
qe.Callback(link)
}
qe.WaitGroup.Done()
diff --git a/internal/linkcheck/stack.go b/internal/linkcheck/stack.go
new file mode 100644
index 0000000..6074a2e
--- /dev/null
+++ b/internal/linkcheck/stack.go
@@ -0,0 +1,27 @@
+package linkcheck
+
+import "strings"
+
+type (
+ stack struct {
+ visits []string
+ }
+)
+
+func (s stack) Count(url string) (n int) {
+ for _, v := range s.visits {
+ if strings.EqualFold(v, url) {
+ n++
+ }
+ }
+
+ return n
+}
+
+func (s stack) Height() int {
+ return len(s.visits)
+}
+
+func (s *stack) Visit(url string) {
+ s.visits = append(s.visits, url)
+}