2023-04-07 22:41:00 +00:00
|
|
|
package linkcheck
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
|
|
|
"net/http/httptest"
|
|
|
|
"sort"
|
|
|
|
"strconv"
|
|
|
|
"testing"
|
|
|
|
|
2023-12-23 22:41:58 +00:00
|
|
|
"github.com/Luzifer/go_helpers/v2/str"
|
2023-04-07 22:41:00 +00:00
|
|
|
"github.com/gorilla/mux"
|
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
)
|
|
|
|
|
|
|
|
func TestInfiniteRedirect(t *testing.T) {
|
|
|
|
hdl := http.NewServeMux()
|
|
|
|
hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/test", http.StatusFound) })
|
|
|
|
hdl.HandleFunc("/test", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/", http.StatusFound) })
|
|
|
|
|
|
|
|
var (
|
2023-12-06 08:17:32 +00:00
|
|
|
c = New(withResolver(newResolver(1, withSkipVerify())))
|
2023-04-07 22:41:00 +00:00
|
|
|
ts = httptest.NewServer(hdl)
|
|
|
|
)
|
|
|
|
t.Cleanup(ts.Close)
|
|
|
|
|
|
|
|
msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL)
|
|
|
|
|
|
|
|
// We expect /test to be the first repeat as the callstack will look like this:
|
|
|
|
// ":12345", ":12345/test", ":12345/", ":12345/test" (which is the duplicate)
|
|
|
|
assert.Equal(t, []string{fmt.Sprintf("%s/test", ts.URL)}, c.ScanForLinks(msg))
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestMaxRedirects(t *testing.T) {
|
|
|
|
hdl := mux.NewRouter()
|
|
|
|
hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/1", http.StatusFound) })
|
|
|
|
hdl.HandleFunc("/{num}", func(w http.ResponseWriter, r *http.Request) {
|
|
|
|
tn, _ := strconv.Atoi(mux.Vars(r)["num"])
|
|
|
|
http.Redirect(w, r, fmt.Sprintf("/%d", tn+1), http.StatusFound)
|
|
|
|
})
|
|
|
|
|
|
|
|
var (
|
2023-12-06 08:17:32 +00:00
|
|
|
c = New(withResolver(newResolver(1, withSkipVerify())))
|
2023-04-07 22:41:00 +00:00
|
|
|
ts = httptest.NewServer(hdl)
|
|
|
|
)
|
|
|
|
t.Cleanup(ts.Close)
|
|
|
|
|
|
|
|
msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL)
|
|
|
|
|
|
|
|
// We expect the call to `/N` to have N previous entries and therefore be the break-point
|
|
|
|
assert.Equal(t, []string{fmt.Sprintf("%s/%d", ts.URL, maxRedirects)}, c.ScanForLinks(msg))
|
|
|
|
}
|
|
|
|
|
2023-07-24 21:27:25 +00:00
|
|
|
//nolint:funlen
|
2023-04-07 22:41:00 +00:00
|
|
|
func TestScanForLinks(t *testing.T) {
|
|
|
|
if testing.Short() {
|
|
|
|
t.SkipNow()
|
|
|
|
}
|
|
|
|
|
|
|
|
c := New()
|
|
|
|
|
|
|
|
for _, testCase := range []struct {
|
2023-12-23 22:41:58 +00:00
|
|
|
Heuristic bool
|
|
|
|
Message string
|
|
|
|
ExpectedLinks []string
|
|
|
|
ExpectedContains bool
|
2023-04-07 22:41:00 +00:00
|
|
|
}{
|
|
|
|
// Case: full URL is present in the message
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "https://example.com",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://example.com",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: full bitly link is present in the message
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "https://bit.ly/438obkJ",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://example.com/",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: link is present just without the protocol
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "Here, take a look at this: bit.ly/438obkJ",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://example.com/",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: message with vk.cc shortener
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "See more here: vk.cc/ckGZN2",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://vk.com/club206261664",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: link is obfuscated using space
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: true,
|
2023-08-25 21:44:08 +00:00
|
|
|
Message: "Take a look on example. com",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"http://example.com",
|
|
|
|
},
|
2024-01-24 19:36:14 +00:00
|
|
|
ExpectedContains: true,
|
2023-04-07 22:41:00 +00:00
|
|
|
},
|
|
|
|
// Case: link is obfuscated using space and braces
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: true,
|
2023-08-25 21:44:08 +00:00
|
|
|
Message: "Take a look on example. (com)",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"http://example.com",
|
|
|
|
},
|
2024-01-24 19:36:14 +00:00
|
|
|
ExpectedContains: true,
|
2023-04-07 22:41:00 +00:00
|
|
|
},
|
|
|
|
// Case: multiple links in one message
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT https://bit.ly/438obkJ",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT",
|
|
|
|
"https://example.com/",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: obfuscation with "dot"
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: true,
|
|
|
|
Message: "I'm live now on twitch dot tv/twitch",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://www.twitch.tv/twitch",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: enhanced "dot" obfuscation
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: true,
|
|
|
|
Message: "You can visit Archive(Dot) org in your browser",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"http://Archive.org",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: Youtube does weird stuff
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
2023-07-27 11:48:44 +00:00
|
|
|
Message: "https://knut.in/youtube",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://www.youtube.com/channel/UCjsRmaAQ0IHR2CNEBqfNOSQ",
|
|
|
|
},
|
|
|
|
},
|
|
|
|
// Case: Instagram also does weird things
|
|
|
|
{
|
2023-07-24 21:27:25 +00:00
|
|
|
Heuristic: false,
|
|
|
|
Message: "https://bit.ly/3KHpJuy",
|
2023-04-07 22:41:00 +00:00
|
|
|
ExpectedLinks: []string{
|
|
|
|
"https://www.instagram.com/instagram/",
|
|
|
|
},
|
|
|
|
},
|
2023-07-24 21:27:25 +00:00
|
|
|
// Case: Heuristic enabled with a German sentence accidentally
|
|
|
|
// forming a valid link to a spanish site (btw.es) - known and
|
|
|
|
// valid false-positive
|
|
|
|
{
|
|
|
|
Heuristic: true,
|
|
|
|
Message: "Hey btw. es kann sein, dass",
|
|
|
|
ExpectedLinks: []string{"https://trusted.evo-media.eu/btw.es"},
|
|
|
|
},
|
|
|
|
// Case: Multiple spaces in the link
|
|
|
|
{
|
2024-01-24 19:36:14 +00:00
|
|
|
Heuristic: true,
|
|
|
|
Message: "Hey there, see my new project on exa mpl e. com! Get it fast now!",
|
|
|
|
ExpectedLinks: []string{"http://example.com"},
|
|
|
|
ExpectedContains: true,
|
2023-07-24 21:27:25 +00:00
|
|
|
},
|
2023-12-05 17:58:23 +00:00
|
|
|
// Case: Dot in the end of the link with space
|
|
|
|
{
|
|
|
|
Heuristic: true,
|
|
|
|
Message: "See example com. Nice testing stuff there!",
|
|
|
|
ExpectedLinks: []string{"http://example.com"},
|
|
|
|
},
|
2023-04-07 22:41:00 +00:00
|
|
|
// Case: false positives
|
2023-07-24 21:27:25 +00:00
|
|
|
{Heuristic: true, Message: "game dot exe has stopped working", ExpectedLinks: nil},
|
2023-12-05 17:58:23 +00:00
|
|
|
{Heuristic: false, Message: "You're following since 12.12.2020 DogChamp", ExpectedLinks: nil},
|
|
|
|
{Heuristic: true, Message: "You're following since 12.12.2020 DogChamp", ExpectedLinks: []string{"http://You.re"}},
|
2023-07-24 21:27:25 +00:00
|
|
|
{Heuristic: false, Message: "Hey btw. es kann sein, dass", ExpectedLinks: nil},
|
2023-04-07 22:41:00 +00:00
|
|
|
} {
|
2023-07-24 21:27:25 +00:00
|
|
|
t.Run(fmt.Sprintf("h:%v lc:%d m:%s", testCase.Heuristic, len(testCase.ExpectedLinks), testCase.Message), func(t *testing.T) {
|
|
|
|
var linksFound []string
|
|
|
|
if testCase.Heuristic {
|
|
|
|
linksFound = c.HeuristicScanForLinks(testCase.Message)
|
|
|
|
} else {
|
|
|
|
linksFound = c.ScanForLinks(testCase.Message)
|
|
|
|
}
|
2023-04-07 22:41:00 +00:00
|
|
|
sort.Strings(linksFound)
|
|
|
|
|
2023-12-23 22:41:58 +00:00
|
|
|
if testCase.ExpectedContains {
|
|
|
|
for _, expLnk := range testCase.ExpectedLinks {
|
|
|
|
assert.Contains(t, linksFound, expLnk)
|
|
|
|
}
|
|
|
|
|
|
|
|
var extraLinks []string
|
|
|
|
for _, link := range linksFound {
|
|
|
|
if !str.StringInSlice(link, testCase.ExpectedLinks) {
|
|
|
|
extraLinks = append(extraLinks, link)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
t.Logf("extra links found: %v", extraLinks)
|
|
|
|
} else {
|
|
|
|
assert.Equal(t, testCase.ExpectedLinks, linksFound)
|
|
|
|
}
|
2023-04-07 22:41:00 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestUserAgentListNotEmpty(t *testing.T) {
|
|
|
|
if len(defaultUserAgents) == 0 {
|
|
|
|
t.Fatal("found empty user-agent list")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestUserAgentRandomizer(t *testing.T) {
|
2023-12-06 08:17:32 +00:00
|
|
|
uas := map[string]int{}
|
2023-04-07 22:41:00 +00:00
|
|
|
|
|
|
|
for i := 0; i < 10; i++ {
|
2023-12-06 08:17:32 +00:00
|
|
|
uas[defaultResolver.userAgent()]++
|
2023-04-07 22:41:00 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for _, c := range uas {
|
|
|
|
assert.Less(t, c, 10)
|
|
|
|
}
|
|
|
|
|
|
|
|
assert.Equal(t, 0, uas[""]) // there should be no empty UA
|
|
|
|
}
|