From d92a451ebc6e88b0c1076faa5c593a9d3a67a87e Mon Sep 17 00:00:00 2001 From: Knut Ahlers Date: Sat, 8 Apr 2023 00:41:00 +0200 Subject: [PATCH] [linkprotect] Add Link-, Clip-Detector and Link-Protection actor (#42) --- Makefile | 6 + internal/actors/clipdetector/actor.go | 77 ++++++ internal/actors/linkdetector/actor.go | 40 +++ internal/actors/linkprotect/actor.go | 338 ++++++++++++++++++++++++++ internal/linkcheck/linkcheck.go | 226 +++++++++++++++++ internal/linkcheck/linkcheck_test.go | 180 ++++++++++++++ internal/linkcheck/user-agents.txt | 43 ++++ pkg/twitch/clips.go | 65 +++++ plugins/fieldcollection.go | 58 +++-- plugins_core.go | 6 + wiki/Actors.md | 59 +++++ 11 files changed, 1082 insertions(+), 16 deletions(-) create mode 100644 internal/actors/clipdetector/actor.go create mode 100644 internal/actors/linkdetector/actor.go create mode 100644 internal/actors/linkprotect/actor.go create mode 100644 internal/linkcheck/linkcheck.go create mode 100644 internal/linkcheck/linkcheck_test.go create mode 100644 internal/linkcheck/user-agents.txt create mode 100644 pkg/twitch/clips.go diff --git a/Makefile b/Makefile index 219f35a..6542457 100644 --- a/Makefile +++ b/Makefile @@ -34,3 +34,9 @@ pull_wiki: push_wiki: git subtree push --prefix=wiki https://github.com/Luzifer/twitch-bot.wiki.git master + +# --- Tools + +update_ua_list: + # User-Agents provided by https://www.useragents.me/ + curl -sSf https://www.useragents.me/api | jq -r '.data[].ua' | grep -v 'Trident' >internal/linkcheck/user-agents.txt diff --git a/internal/actors/clipdetector/actor.go b/internal/actors/clipdetector/actor.go new file mode 100644 index 0000000..b18e5e1 --- /dev/null +++ b/internal/actors/clipdetector/actor.go @@ -0,0 +1,77 @@ +package clipdetector + +import ( + "context" + "regexp" + + "github.com/go-irc/irc" + "github.com/pkg/errors" + + "github.com/Luzifer/twitch-bot/v3/internal/actors/linkdetector" + "github.com/Luzifer/twitch-bot/v3/pkg/twitch" + "github.com/Luzifer/twitch-bot/v3/plugins" +) + +const actorName = "clipdetector" + +var ( + botTwitchClient *twitch.Client + clipIDScanner = regexp.MustCompile(`(?:clips\.twitch\.tv|www\.twitch\.tv/[^/]*/clip)/([A-Za-z0-9_-]+)`) +) + +func Register(args plugins.RegistrationArguments) error { + botTwitchClient = args.GetTwitchClient() + + args.RegisterActor(actorName, func() plugins.Actor { return &Actor{} }) + + args.RegisterActorDocumentation(plugins.ActionDocumentation{ + Description: `Scans for clip-links in the message and adds the "clips" field to the event data`, + Name: "Scan for Clips", + Type: actorName, + }) + + return nil +} + +type Actor struct{} + +func (Actor) Execute(c *irc.Client, m *irc.Message, r *plugins.Rule, eventData *plugins.FieldCollection, attrs *plugins.FieldCollection) (preventCooldown bool, err error) { + if eventData.HasAll("clips") { + // We already detected clips, lets not do it again + return false, nil + } + + // In case the link detector did not run before, lets run it now + if preventCooldown, err = (linkdetector.Actor{}).Execute(c, m, r, eventData, attrs); err != nil { + return preventCooldown, errors.Wrap(err, "detecting links") + } + + links, err := eventData.StringSlice("links") + if err != nil { + return false, errors.Wrap(err, "getting links data") + } + + var clips []twitch.ClipInfo + for _, link := range links { + clipIDMatch := clipIDScanner.FindStringSubmatch(link) + if clipIDMatch == nil { + continue + } + + clipInfo, err := botTwitchClient.GetClipByID(context.Background(), clipIDMatch[1]) + if err != nil { + return false, errors.Wrap(err, "getting clip info") + } + + clips = append(clips, clipInfo) + } + + eventData.Set("clips", clips) + return false, nil +} + +func (Actor) IsAsync() bool { return false } + +func (Actor) Name() string { return actorName } + +func (Actor) Validate(plugins.TemplateValidatorFunc, *plugins.FieldCollection) error { return nil } diff --git a/internal/actors/linkdetector/actor.go b/internal/actors/linkdetector/actor.go new file mode 100644 index 0000000..e70ae7f --- /dev/null +++ b/internal/actors/linkdetector/actor.go @@ -0,0 +1,40 @@ +package linkdetector + +import ( + "github.com/go-irc/irc" + + "github.com/Luzifer/twitch-bot/v3/internal/linkcheck" + "github.com/Luzifer/twitch-bot/v3/plugins" +) + +const actorName = "linkdetector" + +func Register(args plugins.RegistrationArguments) error { + args.RegisterActor(actorName, func() plugins.Actor { return &Actor{} }) + + args.RegisterActorDocumentation(plugins.ActionDocumentation{ + Description: `Scans for links in the message and adds the "links" field to the event data`, + Name: "Scan for Links", + Type: actorName, + }) + + return nil +} + +type Actor struct{} + +func (Actor) Execute(_ *irc.Client, m *irc.Message, _ *plugins.Rule, eventData *plugins.FieldCollection, _ *plugins.FieldCollection) (preventCooldown bool, err error) { + if eventData.HasAll("links") { + // We already detected links, lets not do it again + return false, nil + } + + eventData.Set("links", linkcheck.New().ScanForLinks(m.Trailing())) + return false, nil +} + +func (Actor) IsAsync() bool { return false } + +func (Actor) Name() string { return actorName } + +func (Actor) Validate(plugins.TemplateValidatorFunc, *plugins.FieldCollection) error { return nil } diff --git a/internal/actors/linkprotect/actor.go b/internal/actors/linkprotect/actor.go new file mode 100644 index 0000000..ccd8e76 --- /dev/null +++ b/internal/actors/linkprotect/actor.go @@ -0,0 +1,338 @@ +package linkprotect + +import ( + "regexp" + "strings" + "time" + + "github.com/go-irc/irc" + "github.com/pkg/errors" + + "github.com/Luzifer/twitch-bot/v3/internal/actors/clipdetector" + "github.com/Luzifer/twitch-bot/v3/pkg/twitch" + "github.com/Luzifer/twitch-bot/v3/plugins" +) + +const actorName = "linkprotect" + +var ( + botTwitchClient *twitch.Client + clipLink = regexp.MustCompile(`.*(?:clips\.twitch\.tv|www\.twitch\.tv/[^/]*/clip)/.*`) + ptrBoolFalse = func(v bool) *bool { return &v }(false) + ptrStringEmpty = func(v string) *string { return &v }("") +) + +func Register(args plugins.RegistrationArguments) error { + botTwitchClient = args.GetTwitchClient() + + args.RegisterActor(actorName, func() plugins.Actor { return &actor{} }) + + args.RegisterActorDocumentation(plugins.ActionDocumentation{ + Description: `Uses link- and clip-scanner to detect links / clips and applies link protection as defined`, + Name: "Enforce Link-Protection", + Type: actorName, + + Fields: []plugins.ActionDocumentationField{ + { + Default: "", + Description: "Allowed links (if any is specified all non matching links will cause enforcement action, link must contain any of these strings)", + Key: "allowed_links", + Name: "Allowed Links", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeStringSlice, + }, + { + Default: "", + Description: "Disallowed links (if any is specified all non matching links will not cause enforcement action, link must contain any of these strings)", + Key: "disallowed_links", + Name: "Disallowed Links", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeStringSlice, + }, + { + Default: "", + Description: "Allowed clip channels (if any is specified clips of all other channels will cause enforcement action, clip-links will be ignored in link-protection when this is used)", + Key: "allowed_clip_channels", + Name: "Allowed Clip Channels", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeStringSlice, + }, + { + Default: "", + Description: "Disallowed clip channels (if any is specified clips of all other channels will not cause enforcement action, clip-links will be ignored in link-protection when this is used)", + Key: "disallowed_clip_channels", + Name: "Disallowed Clip Channels", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeStringSlice, + }, + { + Default: "", + Description: "Enforcement action to take when disallowed link / clip is detected (ban, delete, duration-value i.e. 1m)", + Key: "action", + Name: "Action", + Optional: false, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeString, + }, + { + Default: "", + Description: "Reason why the enforcement action was taken", + Key: "reason", + Name: "Reason", + Optional: false, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeString, + }, + { + Default: "false", + Description: "Stop rule execution when action is applied (i.e. not to post a message after a ban for spam links)", + Key: "stop_on_action", + Name: "Stop on Action", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeBool, + }, + { + Default: "false", + Description: "Stop rule execution when no action is applied (i.e. not to post a message when no enforcement action is taken)", + Key: "stop_on_no_action", + Name: "Stop on no Action", + Optional: true, + SupportTemplate: false, + Type: plugins.ActionDocumentationFieldTypeBool, + }, + }, + }) + + return nil +} + +type ( + actor struct{} + + verdict uint +) + +const ( + verdictAllFine verdict = iota + verdictMisbehave +) + +//nolint:gocyclo // Minimum over the limit, makes no sense to split +func (a actor) Execute(c *irc.Client, m *irc.Message, r *plugins.Rule, eventData *plugins.FieldCollection, attrs *plugins.FieldCollection) (preventCooldown bool, err error) { + // In case the clip detector did not run before, lets run it now + if preventCooldown, err = (clipdetector.Actor{}).Execute(c, m, r, eventData, attrs); err != nil { + return preventCooldown, errors.Wrap(err, "detecting links / clips") + } + + links, err := eventData.StringSlice("links") + if err != nil { + return preventCooldown, errors.Wrap(err, "getting links from event") + } + + if len(links) == 0 { + // If there are no links there is nothing to protect and there + // are also no clips as they are parsed from the links + return false, nil + } + + clipsInterface, err := eventData.Any("clips") + if err != nil { + return preventCooldown, errors.Wrap(err, "getting clips from event") + } + clips, ok := clipsInterface.([]twitch.ClipInfo) + if !ok { + return preventCooldown, errors.New("invalid data-type in clips") + } + + if a.check(links, clips, attrs) == verdictAllFine { + if attrs.MustBool("stop_on_no_action", ptrBoolFalse) { + return false, plugins.ErrStopRuleExecution + } + return false, nil + } + + // That message misbehaved so we need to punish them + switch lt := attrs.MustString("action", ptrStringEmpty); lt { + case "ban": + if err = botTwitchClient.BanUser( + plugins.DeriveChannel(m, eventData), + strings.TrimLeft(plugins.DeriveUser(m, eventData), "@"), + 0, + attrs.MustString("reason", ptrStringEmpty), + ); err != nil { + return false, errors.Wrap(err, "executing user ban") + } + + case "delete": + msgID, ok := m.Tags.GetTag("id") + if !ok || msgID == "" { + return false, errors.New("found no mesage id") + } + + if err = botTwitchClient.DeleteMessage( + plugins.DeriveChannel(m, eventData), + msgID, + ); err != nil { + return false, errors.Wrap(err, "deleting message") + } + + default: + to, err := time.ParseDuration(lt) + if err != nil { + return false, errors.Wrap(err, "parsing punishment level") + } + + if err = botTwitchClient.BanUser( + plugins.DeriveChannel(m, eventData), + strings.TrimLeft(plugins.DeriveUser(m, eventData), "@"), + to, + attrs.MustString("reason", ptrStringEmpty), + ); err != nil { + return false, errors.Wrap(err, "executing user ban") + } + } + + if attrs.MustBool("stop_on_action", ptrBoolFalse) { + return false, plugins.ErrStopRuleExecution + } + + return false, nil +} + +func (actor) IsAsync() bool { return false } + +func (actor) Name() string { return actorName } + +func (actor) Validate(_ plugins.TemplateValidatorFunc, attrs *plugins.FieldCollection) error { + if v, err := attrs.String("action"); err != nil || v == "" { + return errors.New("action must be non-empty string") + } + + if v, err := attrs.String("reason"); err != nil || v == "" { + return errors.New("reason must be non-empty string") + } + + if len(attrs.MustStringSlice("allowed_links"))+ + len(attrs.MustStringSlice("disallowed_links"))+ + len(attrs.MustStringSlice("allowed_clip_channels"))+ + len(attrs.MustStringSlice("disallowed_clip_channels")) == 0 { + return errors.New("no conditions are provided") + } + + return nil +} + +func (a actor) check(links []string, clips []twitch.ClipInfo, attrs *plugins.FieldCollection) (v verdict) { + hasClipDefinition := len(attrs.MustStringSlice("allowed_clip_channels"))+len(attrs.MustStringSlice("disallowed_clip_channels")) > 0 + + if v = a.checkLinkDenied(attrs.MustStringSlice("disallowed_links"), links, hasClipDefinition); v == verdictMisbehave { + return verdictMisbehave + } + + if v = a.checkAllLinksAllowed(attrs.MustStringSlice("allowed_links"), links, hasClipDefinition); v == verdictMisbehave { + return verdictMisbehave + } + + if v = a.checkClipChannelDenied(attrs.MustStringSlice("disallowed_clip_channels"), clips); v == verdictMisbehave { + return verdictMisbehave + } + + if v = a.checkAllClipChannelsAllowed(attrs.MustStringSlice("allowed_clip_channels"), clips); v == verdictMisbehave { + return verdictMisbehave + } + + return verdictAllFine +} + +func (actor) checkAllClipChannelsAllowed(allowList []string, clips []twitch.ClipInfo) verdict { + if len(allowList) == 0 { + // We're not explicitly allowing clip-channels, this method is a no-op + return verdictAllFine + } + + allAllowed := true + for _, clip := range clips { + clipAllowed := false + for _, allowed := range allowList { + if strings.EqualFold(clip.BroadcasterName, allowed) { + clipAllowed = true + } + } + + allAllowed = allAllowed && clipAllowed + } + + if allAllowed { + // All clips are fine + return verdictAllFine + } + + // Some clips are not fine + return verdictMisbehave +} + +func (actor) checkClipChannelDenied(denyList []string, clips []twitch.ClipInfo) verdict { + for _, clip := range clips { + for _, denied := range denyList { + if strings.EqualFold(clip.BroadcasterName, denied) { + return verdictMisbehave + } + } + } + + return verdictAllFine +} + +func (actor) checkAllLinksAllowed(allowList, links []string, autoAllowClipLinks bool) verdict { + if len(allowList) == 0 { + // We're not explicitly allowing links, this method is a no-op + return verdictAllFine + } + + allAllowed := true + for _, link := range links { + if autoAllowClipLinks && clipLink.MatchString(link) { + // The default is "true", so we don't change that in this case + // as the expression would be `allowList && true` which is BS + continue + } + + var linkAllowed bool + for _, allowed := range allowList { + linkAllowed = linkAllowed || strings.Contains(strings.ToLower(link), strings.ToLower(allowed)) + } + + allAllowed = allAllowed && linkAllowed + } + + if allAllowed { + // All links are fine + return verdictAllFine + } + + // Some links are not fine + return verdictMisbehave +} + +func (actor) checkLinkDenied(denyList, links []string, ignoreClipLinks bool) verdict { + for _, link := range links { + if ignoreClipLinks && clipLink.MatchString(link) { + // We have special directives for clips so we ignore clip-links + continue + } + + for _, denied := range denyList { + if strings.Contains(strings.ToLower(link), strings.ToLower(denied)) { + // Well, that link is definitely not allowed + return verdictMisbehave + } + } + } + + return verdictAllFine +} diff --git a/internal/linkcheck/linkcheck.go b/internal/linkcheck/linkcheck.go new file mode 100644 index 0000000..cc41832 --- /dev/null +++ b/internal/linkcheck/linkcheck.go @@ -0,0 +1,226 @@ +package linkcheck + +import ( + "context" + "crypto/rand" + _ "embed" + "math/big" + "net/http" + "net/http/cookiejar" + "net/url" + "regexp" + "strings" + "time" + + "github.com/Luzifer/go_helpers/v2/str" +) + +const ( + // DefaultCheckTimeout defines the default time the request to a site + // may take to answer + DefaultCheckTimeout = 10 * time.Second + + maxRedirects = 50 +) + +type ( + // Checker contains logic to detect and resolve links in a message + Checker struct { + checkTimeout time.Duration + userAgents []string + + skipValidation bool // Only for tests, not settable from the outside + } +) + +var ( + defaultUserAgents = []string{} + dropSet = regexp.MustCompile(`[^a-zA-Z0-9.:/\s_-]`) + linkTest = regexp.MustCompile(`(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]`) + numericHost = regexp.MustCompile(`^(?:[0-9]+\.)*[0-9]+(?::[0-9]+)?$`) + + //go:embed user-agents.txt + uaList string +) + +func init() { + defaultUserAgents = strings.Split(strings.TrimSpace(uaList), "\n") +} + +// New creates a new Checker instance with default settings +func New() *Checker { + return &Checker{ + checkTimeout: DefaultCheckTimeout, + userAgents: defaultUserAgents, + } +} + +// ScanForLinks takes a message and tries to find links within that +// message. Common methods like putting spaces into links are tried +// to circumvent. +func (c Checker) ScanForLinks(message string) (links []string) { + for _, scanner := range []func(string) []string{ + c.scanPlainNoObfuscate, + c.scanObfuscateSpace, + c.scanObfuscateSpecialCharsAndSpaces, + c.scanDotObfuscation, + } { + if links = scanner(message); links != nil { + return links + } + } + + return links +} + +// resolveFinal takes a link and looks up the final destination of +// that link after all redirects were followed +func (c Checker) resolveFinal(link string, cookieJar *cookiejar.Jar, callStack []string, userAgent string) string { + if !linkTest.MatchString(link) && !c.skipValidation { + return "" + } + + if str.StringInSlice(link, callStack) || len(callStack) == maxRedirects { + // We got ourselves a loop: Yay! + return link + } + + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return http.ErrUseLastResponse + }, + Jar: cookieJar, + } + + ctx, cancel := context.WithTimeout(context.Background(), c.checkTimeout) + defer cancel() + + u, err := url.Parse(link) + if err != nil { + return "" + } + + if u.Scheme == "" { + // We have no scheme and the url is in the path, lets add the + // scheme and re-parse the URL to avoid some confusion + u.Scheme = "http" + u, err = url.Parse(u.String()) + if err != nil { + return "" + } + } + + if numericHost.MatchString(u.Host) && !c.skipValidation { + // Host is fully numeric: We don't support scanning that + return "" + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return "" + } + + req.Header.Set("User-Agent", userAgent) + + resp, err := client.Do(req) + if err != nil { + return "" + } + defer resp.Body.Close() + + if resp.StatusCode > 299 && resp.StatusCode < 400 { + // We got a redirect + tu, err := url.Parse(resp.Header.Get("location")) + if err != nil { + return "" + } + target := c.resolveReference(u, tu) + return c.resolveFinal(target, cookieJar, append(callStack, link), userAgent) + } + + // We got a response, it's no redirect, we count this as a success + return u.String() +} + +func (Checker) resolveReference(origin *url.URL, loc *url.URL) string { + // Special Case: vkontakte used as shortener / obfuscation + if loc.Path == "/away.php" && loc.Query().Has("to") { + // VK is doing HTML / JS redirect magic so we take that from them + // and execute the redirect directly here in code + return loc.Query().Get("to") + } + + if loc.Host == "consent.youtube.com" && loc.Query().Has("continue") { + // Youtube links end up in consent page but we want the real + // target so we use the continue parameter where we strip the + // cbrd query parameters as that one causes an infinite loop. + + contTarget, err := url.Parse(loc.Query().Get("continue")) + if err == nil { + v := contTarget.Query() + v.Del("cbrd") + + contTarget.RawQuery = v.Encode() + return contTarget.String() + } + + return loc.Query().Get("continue") + } + + if loc.Host == "www.instagram.com" && loc.Query().Has("next") { + // Instagram likes its login page, we on the other side don't + // care about the sign-in or even the content. Therefore we + // just take their redirect target and use that as the next + // URL + return loc.Query().Get("next") + } + + // Default fallback behavior: Do a normal resolve + return origin.ResolveReference(loc).String() +} + +func (Checker) getJar() *cookiejar.Jar { + jar, _ := cookiejar.New(nil) + return jar +} + +func (c Checker) scanDotObfuscation(message string) (links []string) { + message = regexp.MustCompile(`(?i)\s*\(?dot\)?\s*`).ReplaceAllString(message, ".") + return c.scanPlainNoObfuscate(message) +} + +func (c Checker) scanObfuscateSpace(message string) (links []string) { + // Spammers use spaces in their links to prevent link protection matches + parts := regexp.MustCompile(`\s+`).Split(message, -1) + + for i := 0; i < len(parts)-1; i++ { + if link := c.resolveFinal(strings.Join(parts[i:i+2], ""), c.getJar(), nil, c.userAgent()); link != "" { + links = append(links, link) + } + } + + return links +} + +func (c Checker) scanObfuscateSpecialCharsAndSpaces(message string) (links []string) { + // First clean URL from all characters not acceptable in Domains (plus some extra chars) + message = dropSet.ReplaceAllString(message, "") + return c.scanObfuscateSpace(message) +} + +func (c Checker) scanPlainNoObfuscate(message string) (links []string) { + parts := regexp.MustCompile(`\s+`).Split(message, -1) + + for _, part := range parts { + if link := c.resolveFinal(part, c.getJar(), nil, c.userAgent()); link != "" { + links = append(links, link) + } + } + + return links +} + +func (c Checker) userAgent() string { + n, _ := rand.Int(rand.Reader, big.NewInt(int64(len(c.userAgents)))) + return c.userAgents[n.Int64()] +} diff --git a/internal/linkcheck/linkcheck_test.go b/internal/linkcheck/linkcheck_test.go new file mode 100644 index 0000000..1728556 --- /dev/null +++ b/internal/linkcheck/linkcheck_test.go @@ -0,0 +1,180 @@ +package linkcheck + +import ( + "fmt" + "net/http" + "net/http/httptest" + "sort" + "strconv" + "testing" + + "github.com/gorilla/mux" + "github.com/stretchr/testify/assert" +) + +func TestInfiniteRedirect(t *testing.T) { + hdl := http.NewServeMux() + hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/test", http.StatusFound) }) + hdl.HandleFunc("/test", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/", http.StatusFound) }) + + var ( + c = New() + ts = httptest.NewServer(hdl) + ) + t.Cleanup(ts.Close) + + c.skipValidation = true + + msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL) + + // We expect /test to be the first repeat as the callstack will look like this: + // ":12345", ":12345/test", ":12345/", ":12345/test" (which is the duplicate) + assert.Equal(t, []string{fmt.Sprintf("%s/test", ts.URL)}, c.ScanForLinks(msg)) +} + +func TestMaxRedirects(t *testing.T) { + hdl := mux.NewRouter() + hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/1", http.StatusFound) }) + hdl.HandleFunc("/{num}", func(w http.ResponseWriter, r *http.Request) { + tn, _ := strconv.Atoi(mux.Vars(r)["num"]) + http.Redirect(w, r, fmt.Sprintf("/%d", tn+1), http.StatusFound) + }) + + var ( + c = New() + ts = httptest.NewServer(hdl) + ) + t.Cleanup(ts.Close) + + c.skipValidation = true + + msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL) + + // We expect the call to `/N` to have N previous entries and therefore be the break-point + assert.Equal(t, []string{fmt.Sprintf("%s/%d", ts.URL, maxRedirects)}, c.ScanForLinks(msg)) +} + +func TestScanForLinks(t *testing.T) { + if testing.Short() { + t.SkipNow() + } + + c := New() + + for _, testCase := range []struct { + Message string + ExpectedLinks []string + }{ + // Case: full URL is present in the message + { + Message: "https://example.com", + ExpectedLinks: []string{ + "https://example.com", + }, + }, + // Case: full bitly link is present in the message + { + Message: "https://bit.ly/438obkJ", + ExpectedLinks: []string{ + "https://example.com/", + }, + }, + // Case: link is present just without the protocol + { + Message: "Here, take a look at this: bit.ly/438obkJ", + ExpectedLinks: []string{ + "https://example.com/", + }, + }, + // Case: message with vk.cc shortener + { + Message: "See more here: vk.cc/ckGZN2", + ExpectedLinks: []string{ + "https://vk.com/club206261664", + }, + }, + // Case: link is obfuscated using space + { + Message: "Take a look at example. com", + ExpectedLinks: []string{ + "http://example.com", + }, + }, + // Case: link is obfuscated using space and braces + { + Message: "Take a look at example. (com)", + ExpectedLinks: []string{ + "http://example.com", + }, + }, + // Case: multiple links in one message + { + Message: "https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT https://bit.ly/438obkJ", + ExpectedLinks: []string{ + "https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT", + "https://example.com/", + }, + }, + // Case: obfuscation with "dot" + { + Message: "I'm live now on twitch dot tv/twitch", + ExpectedLinks: []string{ + "https://www.twitch.tv/twitch", + }, + }, + // Case: enhanced "dot" obfuscation + { + Message: "You can visit Archive(Dot) org in your browser", + ExpectedLinks: []string{ + "http://Archive.org", + }, + }, + // Case: Youtube does weird stuff + { + Message: "https://luziferus.tv/youtube", + ExpectedLinks: []string{ + "https://www.youtube.com/channel/UCjsRmaAQ0IHR2CNEBqfNOSQ", + }, + }, + // Case: Instagram also does weird things + { + Message: "https://bit.ly/3KHpJuy", + ExpectedLinks: []string{ + "https://www.instagram.com/instagram/", + }, + }, + // Case: false positives + {Message: "game dot exe has stopped working", ExpectedLinks: nil}, + {Message: "You're following since 12.12.2020 DogChamp", ExpectedLinks: nil}, + } { + t.Run(testCase.Message, func(t *testing.T) { + linksFound := c.ScanForLinks(testCase.Message) + sort.Strings(linksFound) + + assert.Equal(t, testCase.ExpectedLinks, linksFound, "links from message %q", testCase.Message) + }) + } +} + +func TestUserAgentListNotEmpty(t *testing.T) { + if len(defaultUserAgents) == 0 { + t.Fatal("found empty user-agent list") + } +} + +func TestUserAgentRandomizer(t *testing.T) { + var ( + c = New() + uas = map[string]int{} + ) + + for i := 0; i < 10; i++ { + uas[c.userAgent()]++ + } + + for _, c := range uas { + assert.Less(t, c, 10) + } + + assert.Equal(t, 0, uas[""]) // there should be no empty UA +} diff --git a/internal/linkcheck/user-agents.txt b/internal/linkcheck/user-agents.txt new file mode 100644 index 0000000..43e1e57 --- /dev/null +++ b/internal/linkcheck/user-agents.txt @@ -0,0 +1,43 @@ +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 +Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 +Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Whale/3.19.166.16 Safari/537.36 +Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.46 +Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.192.400 QQBrowser/11.5.5250.400 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36 +Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.67 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0 +Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 +Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763 +Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 +Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63 +Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.61 +Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/110.0 +Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70 diff --git a/pkg/twitch/clips.go b/pkg/twitch/clips.go new file mode 100644 index 0000000..263f476 --- /dev/null +++ b/pkg/twitch/clips.go @@ -0,0 +1,65 @@ +package twitch + +import ( + "context" + "fmt" + "net/http" + "time" + + "github.com/pkg/errors" +) + +const clipCacheTimeout = 10 * time.Minute // Clips do not change that fast + +type ( + ClipInfo struct { + ID string `json:"id"` + URL string `json:"url"` + EmbedURL string `json:"embed_url"` + BroadcasterID string `json:"broadcaster_id"` + BroadcasterName string `json:"broadcaster_name"` + CreatorID string `json:"creator_id"` + CreatorName string `json:"creator_name"` + VideoID string `json:"video_id"` + GameID string `json:"game_id"` + Language string `json:"language"` + Title string `json:"title"` + ViewCount int64 `json:"view_count"` + CreatedAt time.Time `json:"created_at"` + ThumbnailURL string `json:"thumbnail_url"` + Duration float64 `json:"duration"` + VodOffset int64 `json:"vod_offset"` + } +) + +// GetClipByID gets a video clip that were captured from streams by +// its ID (slug in the URL) +func (c *Client) GetClipByID(ctx context.Context, clipID string) (ClipInfo, error) { + cacheKey := []string{"getClipByID", clipID} + if clip := c.apiCache.Get(cacheKey); clip != nil { + return clip.(ClipInfo), nil + } + + var payload struct { + Data []ClipInfo + } + + if err := c.request(clientRequestOpts{ + AuthType: authTypeAppAccessToken, + Context: ctx, + Method: http.MethodGet, + OKStatus: http.StatusOK, + Out: &payload, + URL: fmt.Sprintf("https://api.twitch.tv/helix/clips?id=%s", clipID), + }); err != nil { + return ClipInfo{}, errors.Wrap(err, "getting clip info") + } + + if l := len(payload.Data); l != 1 { + return ClipInfo{}, errors.Errorf("unexpected number of clip info returned: %d", l) + } + + c.apiCache.Set(cacheKey, clipCacheTimeout, payload.Data[0]) + + return payload.Data[0], nil +} diff --git a/plugins/fieldcollection.go b/plugins/fieldcollection.go index a65c44a..645d6d9 100644 --- a/plugins/fieldcollection.go +++ b/plugins/fieldcollection.go @@ -17,17 +17,17 @@ var ( ) type FieldCollection struct { - data map[string]interface{} + data map[string]any lock sync.RWMutex } // NewFieldCollection creates a new FieldCollection with empty data store func NewFieldCollection() *FieldCollection { - return &FieldCollection{data: make(map[string]interface{})} + return &FieldCollection{data: make(map[string]any)} } // FieldCollectionFromData is a wrapper around NewFieldCollection and SetFromData -func FieldCollectionFromData(data map[string]interface{}) *FieldCollection { +func FieldCollectionFromData(data map[string]any) *FieldCollection { o := NewFieldCollection() o.SetFromData(data) return o @@ -65,7 +65,7 @@ func (f *FieldCollection) Clone() *FieldCollection { } // Data creates a map-copy of the data stored inside the FieldCollection -func (f *FieldCollection) Data() map[string]interface{} { +func (f *FieldCollection) Data() map[string]any { if f == nil { return nil } @@ -73,7 +73,7 @@ func (f *FieldCollection) Data() map[string]interface{} { f.lock.RLock() defer f.lock.RUnlock() - out := make(map[string]interface{}) + out := make(map[string]any) for k := range f.data { out[k] = f.data[k] } @@ -162,6 +162,32 @@ func (f *FieldCollection) MustString(name string, defVal *string) string { return v } +// MustStringSlice is a wrapper around StringSlice and returns nil in case name is not set +func (f *FieldCollection) MustStringSlice(name string) []string { + v, err := f.StringSlice(name) + if err != nil { + return nil + } + return v +} + +// Any tries to read key name as any-type (interface) +func (f *FieldCollection) Any(name string) (any, error) { + if f == nil || f.data == nil { + return false, errors.New("uninitialized field collection") + } + + f.lock.RLock() + defer f.lock.RUnlock() + + v, ok := f.data[name] + if !ok { + return false, ErrValueNotSet + } + + return v, nil +} + // Bool tries to read key name as bool func (f *FieldCollection) Bool(name string) (bool, error) { if f == nil || f.data == nil { @@ -236,7 +262,7 @@ func (f *FieldCollection) Int64(name string) (int64, error) { } // Set sets a single key to specified value -func (f *FieldCollection) Set(key string, value interface{}) { +func (f *FieldCollection) Set(key string, value any) { if f == nil { f = NewFieldCollection() } @@ -245,14 +271,14 @@ func (f *FieldCollection) Set(key string, value interface{}) { defer f.lock.Unlock() if f.data == nil { - f.data = make(map[string]interface{}) + f.data = make(map[string]any) } f.data[key] = value } // SetFromData takes a map of data and copies all data into the FieldCollection -func (f *FieldCollection) SetFromData(data map[string]interface{}) { +func (f *FieldCollection) SetFromData(data map[string]any) { if f == nil { f = NewFieldCollection() } @@ -261,7 +287,7 @@ func (f *FieldCollection) SetFromData(data map[string]interface{}) { defer f.lock.Unlock() if f.data == nil { - f.data = make(map[string]interface{}) + f.data = make(map[string]any) } for key, value := range data { @@ -312,7 +338,7 @@ func (f *FieldCollection) StringSlice(name string) ([]string, error) { case []string: return v, nil - case []interface{}: + case []any: var out []string for _, iv := range v { @@ -329,7 +355,7 @@ func (f *FieldCollection) StringSlice(name string) ([]string, error) { return nil, ErrValueMismatch } -// Implement JSON marshalling to plain underlying map[string]interface{} +// Implement JSON marshalling to plain underlying map[string]any func (f *FieldCollection) MarshalJSON() ([]byte, error) { if f == nil || f.data == nil { @@ -343,7 +369,7 @@ func (f *FieldCollection) MarshalJSON() ([]byte, error) { } func (f *FieldCollection) UnmarshalJSON(raw []byte) error { - data := make(map[string]interface{}) + data := make(map[string]any) if err := json.Unmarshal(raw, &data); err != nil { return errors.Wrap(err, "unmarshalling from JSON") } @@ -352,14 +378,14 @@ func (f *FieldCollection) UnmarshalJSON(raw []byte) error { return nil } -// Implement YAML marshalling to plain underlying map[string]interface{} +// Implement YAML marshalling to plain underlying map[string]any -func (f *FieldCollection) MarshalYAML() (interface{}, error) { +func (f *FieldCollection) MarshalYAML() (any, error) { return f.Data(), nil } -func (f *FieldCollection) UnmarshalYAML(unmarshal func(interface{}) error) error { - data := make(map[string]interface{}) +func (f *FieldCollection) UnmarshalYAML(unmarshal func(any) error) error { + data := make(map[string]any) if err := unmarshal(&data); err != nil { return errors.Wrap(err, "unmarshalling from YAML") } diff --git a/plugins_core.go b/plugins_core.go index 81db751..26c1654 100644 --- a/plugins_core.go +++ b/plugins_core.go @@ -12,12 +12,15 @@ import ( "github.com/Luzifer/go_helpers/v2/str" "github.com/Luzifer/twitch-bot/v3/internal/actors/announce" "github.com/Luzifer/twitch-bot/v3/internal/actors/ban" + "github.com/Luzifer/twitch-bot/v3/internal/actors/clipdetector" "github.com/Luzifer/twitch-bot/v3/internal/actors/commercial" "github.com/Luzifer/twitch-bot/v3/internal/actors/counter" "github.com/Luzifer/twitch-bot/v3/internal/actors/delay" deleteactor "github.com/Luzifer/twitch-bot/v3/internal/actors/delete" "github.com/Luzifer/twitch-bot/v3/internal/actors/eventmod" "github.com/Luzifer/twitch-bot/v3/internal/actors/filesay" + "github.com/Luzifer/twitch-bot/v3/internal/actors/linkdetector" + "github.com/Luzifer/twitch-bot/v3/internal/actors/linkprotect" logActor "github.com/Luzifer/twitch-bot/v3/internal/actors/log" "github.com/Luzifer/twitch-bot/v3/internal/actors/modchannel" "github.com/Luzifer/twitch-bot/v3/internal/actors/nuke" @@ -52,12 +55,15 @@ var ( // Actors announce.Register, ban.Register, + clipdetector.Register, commercial.Register, counter.Register, delay.Register, deleteactor.Register, eventmod.Register, filesay.Register, + linkdetector.Register, + linkprotect.Register, logActor.Register, modchannel.Register, nuke.Register, diff --git a/wiki/Actors.md b/wiki/Actors.md index abe85ae..5cda458 100644 --- a/wiki/Actors.md +++ b/wiki/Actors.md @@ -100,6 +100,47 @@ Delete message which caused the rule to be executed # Does not have configuration attributes ``` +## Enforce Link-Protection + +Uses link- and clip-scanner to detect links / clips and applies link protection as defined + +```yaml +- type: linkprotect + attributes: + # Allowed links (if any is specified all non matching links will cause enforcement action, link must contain any of these strings) + # Optional: true + # Type: array of strings + allowed_links: [] + # Disallowed links (if any is specified all non matching links will not cause enforcement action, link must contain any of these strings) + # Optional: true + # Type: array of strings + disallowed_links: [] + # Allowed clip channels (if any is specified clips of all other channels will cause enforcement action, clip-links will be ignored in link-protection when this is used) + # Optional: true + # Type: array of strings + allowed_clip_channels: [] + # Disallowed clip channels (if any is specified clips of all other channels will not cause enforcement action, clip-links will be ignored in link-protection when this is used) + # Optional: true + # Type: array of strings + disallowed_clip_channels: [] + # Enforcement action to take when disallowed link / clip is detected (ban, delete, duration-value i.e. 1m) + # Optional: false + # Type: string + action: "" + # Reason why the enforcement action was taken + # Optional: false + # Type: string + reason: "" + # Stop rule execution when action is applied (i.e. not to post a message after a ban for spam links) + # Optional: true + # Type: bool + stop_on_action: false + # Stop rule execution when no action is applied (i.e. not to post a message when no enforcement action is taken) + # Optional: true + # Type: bool + stop_on_no_action: false +``` + ## Execute Script / Command Execute external script / command @@ -340,6 +381,24 @@ Respond to message with a new message to_channel: "" ``` +## Scan for Clips + +Scans for clip-links in the message and adds the "clips" field to the event data + +```yaml +- type: clipdetector + # Does not have configuration attributes +``` + +## Scan for Links + +Scans for links in the message and adds the "links" field to the event data + +```yaml +- type: linkdetector + # Does not have configuration attributes +``` + ## Send RAW Message Send raw IRC message