[linkprotect] Add Link-, Clip-Detector and Link-Protection actor (#42)

This commit is contained in:
Knut Ahlers 2023-04-08 00:41:00 +02:00 committed by GitHub
parent 4f12b5c206
commit d92a451ebc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 1082 additions and 16 deletions

View file

@ -34,3 +34,9 @@ pull_wiki:
push_wiki: push_wiki:
git subtree push --prefix=wiki https://github.com/Luzifer/twitch-bot.wiki.git master git subtree push --prefix=wiki https://github.com/Luzifer/twitch-bot.wiki.git master
# --- Tools
update_ua_list:
# User-Agents provided by https://www.useragents.me/
curl -sSf https://www.useragents.me/api | jq -r '.data[].ua' | grep -v 'Trident' >internal/linkcheck/user-agents.txt

View file

@ -0,0 +1,77 @@
package clipdetector
import (
"context"
"regexp"
"github.com/go-irc/irc"
"github.com/pkg/errors"
"github.com/Luzifer/twitch-bot/v3/internal/actors/linkdetector"
"github.com/Luzifer/twitch-bot/v3/pkg/twitch"
"github.com/Luzifer/twitch-bot/v3/plugins"
)
const actorName = "clipdetector"
var (
botTwitchClient *twitch.Client
clipIDScanner = regexp.MustCompile(`(?:clips\.twitch\.tv|www\.twitch\.tv/[^/]*/clip)/([A-Za-z0-9_-]+)`)
)
func Register(args plugins.RegistrationArguments) error {
botTwitchClient = args.GetTwitchClient()
args.RegisterActor(actorName, func() plugins.Actor { return &Actor{} })
args.RegisterActorDocumentation(plugins.ActionDocumentation{
Description: `Scans for clip-links in the message and adds the "clips" field to the event data`,
Name: "Scan for Clips",
Type: actorName,
})
return nil
}
type Actor struct{}
func (Actor) Execute(c *irc.Client, m *irc.Message, r *plugins.Rule, eventData *plugins.FieldCollection, attrs *plugins.FieldCollection) (preventCooldown bool, err error) {
if eventData.HasAll("clips") {
// We already detected clips, lets not do it again
return false, nil
}
// In case the link detector did not run before, lets run it now
if preventCooldown, err = (linkdetector.Actor{}).Execute(c, m, r, eventData, attrs); err != nil {
return preventCooldown, errors.Wrap(err, "detecting links")
}
links, err := eventData.StringSlice("links")
if err != nil {
return false, errors.Wrap(err, "getting links data")
}
var clips []twitch.ClipInfo
for _, link := range links {
clipIDMatch := clipIDScanner.FindStringSubmatch(link)
if clipIDMatch == nil {
continue
}
clipInfo, err := botTwitchClient.GetClipByID(context.Background(), clipIDMatch[1])
if err != nil {
return false, errors.Wrap(err, "getting clip info")
}
clips = append(clips, clipInfo)
}
eventData.Set("clips", clips)
return false, nil
}
func (Actor) IsAsync() bool { return false }
func (Actor) Name() string { return actorName }
func (Actor) Validate(plugins.TemplateValidatorFunc, *plugins.FieldCollection) error { return nil }

View file

@ -0,0 +1,40 @@
package linkdetector
import (
"github.com/go-irc/irc"
"github.com/Luzifer/twitch-bot/v3/internal/linkcheck"
"github.com/Luzifer/twitch-bot/v3/plugins"
)
const actorName = "linkdetector"
func Register(args plugins.RegistrationArguments) error {
args.RegisterActor(actorName, func() plugins.Actor { return &Actor{} })
args.RegisterActorDocumentation(plugins.ActionDocumentation{
Description: `Scans for links in the message and adds the "links" field to the event data`,
Name: "Scan for Links",
Type: actorName,
})
return nil
}
type Actor struct{}
func (Actor) Execute(_ *irc.Client, m *irc.Message, _ *plugins.Rule, eventData *plugins.FieldCollection, _ *plugins.FieldCollection) (preventCooldown bool, err error) {
if eventData.HasAll("links") {
// We already detected links, lets not do it again
return false, nil
}
eventData.Set("links", linkcheck.New().ScanForLinks(m.Trailing()))
return false, nil
}
func (Actor) IsAsync() bool { return false }
func (Actor) Name() string { return actorName }
func (Actor) Validate(plugins.TemplateValidatorFunc, *plugins.FieldCollection) error { return nil }

View file

@ -0,0 +1,338 @@
package linkprotect
import (
"regexp"
"strings"
"time"
"github.com/go-irc/irc"
"github.com/pkg/errors"
"github.com/Luzifer/twitch-bot/v3/internal/actors/clipdetector"
"github.com/Luzifer/twitch-bot/v3/pkg/twitch"
"github.com/Luzifer/twitch-bot/v3/plugins"
)
const actorName = "linkprotect"
var (
botTwitchClient *twitch.Client
clipLink = regexp.MustCompile(`.*(?:clips\.twitch\.tv|www\.twitch\.tv/[^/]*/clip)/.*`)
ptrBoolFalse = func(v bool) *bool { return &v }(false)
ptrStringEmpty = func(v string) *string { return &v }("")
)
func Register(args plugins.RegistrationArguments) error {
botTwitchClient = args.GetTwitchClient()
args.RegisterActor(actorName, func() plugins.Actor { return &actor{} })
args.RegisterActorDocumentation(plugins.ActionDocumentation{
Description: `Uses link- and clip-scanner to detect links / clips and applies link protection as defined`,
Name: "Enforce Link-Protection",
Type: actorName,
Fields: []plugins.ActionDocumentationField{
{
Default: "",
Description: "Allowed links (if any is specified all non matching links will cause enforcement action, link must contain any of these strings)",
Key: "allowed_links",
Name: "Allowed Links",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeStringSlice,
},
{
Default: "",
Description: "Disallowed links (if any is specified all non matching links will not cause enforcement action, link must contain any of these strings)",
Key: "disallowed_links",
Name: "Disallowed Links",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeStringSlice,
},
{
Default: "",
Description: "Allowed clip channels (if any is specified clips of all other channels will cause enforcement action, clip-links will be ignored in link-protection when this is used)",
Key: "allowed_clip_channels",
Name: "Allowed Clip Channels",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeStringSlice,
},
{
Default: "",
Description: "Disallowed clip channels (if any is specified clips of all other channels will not cause enforcement action, clip-links will be ignored in link-protection when this is used)",
Key: "disallowed_clip_channels",
Name: "Disallowed Clip Channels",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeStringSlice,
},
{
Default: "",
Description: "Enforcement action to take when disallowed link / clip is detected (ban, delete, duration-value i.e. 1m)",
Key: "action",
Name: "Action",
Optional: false,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeString,
},
{
Default: "",
Description: "Reason why the enforcement action was taken",
Key: "reason",
Name: "Reason",
Optional: false,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeString,
},
{
Default: "false",
Description: "Stop rule execution when action is applied (i.e. not to post a message after a ban for spam links)",
Key: "stop_on_action",
Name: "Stop on Action",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeBool,
},
{
Default: "false",
Description: "Stop rule execution when no action is applied (i.e. not to post a message when no enforcement action is taken)",
Key: "stop_on_no_action",
Name: "Stop on no Action",
Optional: true,
SupportTemplate: false,
Type: plugins.ActionDocumentationFieldTypeBool,
},
},
})
return nil
}
type (
actor struct{}
verdict uint
)
const (
verdictAllFine verdict = iota
verdictMisbehave
)
//nolint:gocyclo // Minimum over the limit, makes no sense to split
func (a actor) Execute(c *irc.Client, m *irc.Message, r *plugins.Rule, eventData *plugins.FieldCollection, attrs *plugins.FieldCollection) (preventCooldown bool, err error) {
// In case the clip detector did not run before, lets run it now
if preventCooldown, err = (clipdetector.Actor{}).Execute(c, m, r, eventData, attrs); err != nil {
return preventCooldown, errors.Wrap(err, "detecting links / clips")
}
links, err := eventData.StringSlice("links")
if err != nil {
return preventCooldown, errors.Wrap(err, "getting links from event")
}
if len(links) == 0 {
// If there are no links there is nothing to protect and there
// are also no clips as they are parsed from the links
return false, nil
}
clipsInterface, err := eventData.Any("clips")
if err != nil {
return preventCooldown, errors.Wrap(err, "getting clips from event")
}
clips, ok := clipsInterface.([]twitch.ClipInfo)
if !ok {
return preventCooldown, errors.New("invalid data-type in clips")
}
if a.check(links, clips, attrs) == verdictAllFine {
if attrs.MustBool("stop_on_no_action", ptrBoolFalse) {
return false, plugins.ErrStopRuleExecution
}
return false, nil
}
// That message misbehaved so we need to punish them
switch lt := attrs.MustString("action", ptrStringEmpty); lt {
case "ban":
if err = botTwitchClient.BanUser(
plugins.DeriveChannel(m, eventData),
strings.TrimLeft(plugins.DeriveUser(m, eventData), "@"),
0,
attrs.MustString("reason", ptrStringEmpty),
); err != nil {
return false, errors.Wrap(err, "executing user ban")
}
case "delete":
msgID, ok := m.Tags.GetTag("id")
if !ok || msgID == "" {
return false, errors.New("found no mesage id")
}
if err = botTwitchClient.DeleteMessage(
plugins.DeriveChannel(m, eventData),
msgID,
); err != nil {
return false, errors.Wrap(err, "deleting message")
}
default:
to, err := time.ParseDuration(lt)
if err != nil {
return false, errors.Wrap(err, "parsing punishment level")
}
if err = botTwitchClient.BanUser(
plugins.DeriveChannel(m, eventData),
strings.TrimLeft(plugins.DeriveUser(m, eventData), "@"),
to,
attrs.MustString("reason", ptrStringEmpty),
); err != nil {
return false, errors.Wrap(err, "executing user ban")
}
}
if attrs.MustBool("stop_on_action", ptrBoolFalse) {
return false, plugins.ErrStopRuleExecution
}
return false, nil
}
func (actor) IsAsync() bool { return false }
func (actor) Name() string { return actorName }
func (actor) Validate(_ plugins.TemplateValidatorFunc, attrs *plugins.FieldCollection) error {
if v, err := attrs.String("action"); err != nil || v == "" {
return errors.New("action must be non-empty string")
}
if v, err := attrs.String("reason"); err != nil || v == "" {
return errors.New("reason must be non-empty string")
}
if len(attrs.MustStringSlice("allowed_links"))+
len(attrs.MustStringSlice("disallowed_links"))+
len(attrs.MustStringSlice("allowed_clip_channels"))+
len(attrs.MustStringSlice("disallowed_clip_channels")) == 0 {
return errors.New("no conditions are provided")
}
return nil
}
func (a actor) check(links []string, clips []twitch.ClipInfo, attrs *plugins.FieldCollection) (v verdict) {
hasClipDefinition := len(attrs.MustStringSlice("allowed_clip_channels"))+len(attrs.MustStringSlice("disallowed_clip_channels")) > 0
if v = a.checkLinkDenied(attrs.MustStringSlice("disallowed_links"), links, hasClipDefinition); v == verdictMisbehave {
return verdictMisbehave
}
if v = a.checkAllLinksAllowed(attrs.MustStringSlice("allowed_links"), links, hasClipDefinition); v == verdictMisbehave {
return verdictMisbehave
}
if v = a.checkClipChannelDenied(attrs.MustStringSlice("disallowed_clip_channels"), clips); v == verdictMisbehave {
return verdictMisbehave
}
if v = a.checkAllClipChannelsAllowed(attrs.MustStringSlice("allowed_clip_channels"), clips); v == verdictMisbehave {
return verdictMisbehave
}
return verdictAllFine
}
func (actor) checkAllClipChannelsAllowed(allowList []string, clips []twitch.ClipInfo) verdict {
if len(allowList) == 0 {
// We're not explicitly allowing clip-channels, this method is a no-op
return verdictAllFine
}
allAllowed := true
for _, clip := range clips {
clipAllowed := false
for _, allowed := range allowList {
if strings.EqualFold(clip.BroadcasterName, allowed) {
clipAllowed = true
}
}
allAllowed = allAllowed && clipAllowed
}
if allAllowed {
// All clips are fine
return verdictAllFine
}
// Some clips are not fine
return verdictMisbehave
}
func (actor) checkClipChannelDenied(denyList []string, clips []twitch.ClipInfo) verdict {
for _, clip := range clips {
for _, denied := range denyList {
if strings.EqualFold(clip.BroadcasterName, denied) {
return verdictMisbehave
}
}
}
return verdictAllFine
}
func (actor) checkAllLinksAllowed(allowList, links []string, autoAllowClipLinks bool) verdict {
if len(allowList) == 0 {
// We're not explicitly allowing links, this method is a no-op
return verdictAllFine
}
allAllowed := true
for _, link := range links {
if autoAllowClipLinks && clipLink.MatchString(link) {
// The default is "true", so we don't change that in this case
// as the expression would be `allowList && true` which is BS
continue
}
var linkAllowed bool
for _, allowed := range allowList {
linkAllowed = linkAllowed || strings.Contains(strings.ToLower(link), strings.ToLower(allowed))
}
allAllowed = allAllowed && linkAllowed
}
if allAllowed {
// All links are fine
return verdictAllFine
}
// Some links are not fine
return verdictMisbehave
}
func (actor) checkLinkDenied(denyList, links []string, ignoreClipLinks bool) verdict {
for _, link := range links {
if ignoreClipLinks && clipLink.MatchString(link) {
// We have special directives for clips so we ignore clip-links
continue
}
for _, denied := range denyList {
if strings.Contains(strings.ToLower(link), strings.ToLower(denied)) {
// Well, that link is definitely not allowed
return verdictMisbehave
}
}
}
return verdictAllFine
}

View file

@ -0,0 +1,226 @@
package linkcheck
import (
"context"
"crypto/rand"
_ "embed"
"math/big"
"net/http"
"net/http/cookiejar"
"net/url"
"regexp"
"strings"
"time"
"github.com/Luzifer/go_helpers/v2/str"
)
const (
// DefaultCheckTimeout defines the default time the request to a site
// may take to answer
DefaultCheckTimeout = 10 * time.Second
maxRedirects = 50
)
type (
// Checker contains logic to detect and resolve links in a message
Checker struct {
checkTimeout time.Duration
userAgents []string
skipValidation bool // Only for tests, not settable from the outside
}
)
var (
defaultUserAgents = []string{}
dropSet = regexp.MustCompile(`[^a-zA-Z0-9.:/\s_-]`)
linkTest = regexp.MustCompile(`(?:[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?\.)+[a-z0-9][a-z0-9-]{0,61}[a-z0-9]`)
numericHost = regexp.MustCompile(`^(?:[0-9]+\.)*[0-9]+(?::[0-9]+)?$`)
//go:embed user-agents.txt
uaList string
)
func init() {
defaultUserAgents = strings.Split(strings.TrimSpace(uaList), "\n")
}
// New creates a new Checker instance with default settings
func New() *Checker {
return &Checker{
checkTimeout: DefaultCheckTimeout,
userAgents: defaultUserAgents,
}
}
// ScanForLinks takes a message and tries to find links within that
// message. Common methods like putting spaces into links are tried
// to circumvent.
func (c Checker) ScanForLinks(message string) (links []string) {
for _, scanner := range []func(string) []string{
c.scanPlainNoObfuscate,
c.scanObfuscateSpace,
c.scanObfuscateSpecialCharsAndSpaces,
c.scanDotObfuscation,
} {
if links = scanner(message); links != nil {
return links
}
}
return links
}
// resolveFinal takes a link and looks up the final destination of
// that link after all redirects were followed
func (c Checker) resolveFinal(link string, cookieJar *cookiejar.Jar, callStack []string, userAgent string) string {
if !linkTest.MatchString(link) && !c.skipValidation {
return ""
}
if str.StringInSlice(link, callStack) || len(callStack) == maxRedirects {
// We got ourselves a loop: Yay!
return link
}
client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
Jar: cookieJar,
}
ctx, cancel := context.WithTimeout(context.Background(), c.checkTimeout)
defer cancel()
u, err := url.Parse(link)
if err != nil {
return ""
}
if u.Scheme == "" {
// We have no scheme and the url is in the path, lets add the
// scheme and re-parse the URL to avoid some confusion
u.Scheme = "http"
u, err = url.Parse(u.String())
if err != nil {
return ""
}
}
if numericHost.MatchString(u.Host) && !c.skipValidation {
// Host is fully numeric: We don't support scanning that
return ""
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
return ""
}
req.Header.Set("User-Agent", userAgent)
resp, err := client.Do(req)
if err != nil {
return ""
}
defer resp.Body.Close()
if resp.StatusCode > 299 && resp.StatusCode < 400 {
// We got a redirect
tu, err := url.Parse(resp.Header.Get("location"))
if err != nil {
return ""
}
target := c.resolveReference(u, tu)
return c.resolveFinal(target, cookieJar, append(callStack, link), userAgent)
}
// We got a response, it's no redirect, we count this as a success
return u.String()
}
func (Checker) resolveReference(origin *url.URL, loc *url.URL) string {
// Special Case: vkontakte used as shortener / obfuscation
if loc.Path == "/away.php" && loc.Query().Has("to") {
// VK is doing HTML / JS redirect magic so we take that from them
// and execute the redirect directly here in code
return loc.Query().Get("to")
}
if loc.Host == "consent.youtube.com" && loc.Query().Has("continue") {
// Youtube links end up in consent page but we want the real
// target so we use the continue parameter where we strip the
// cbrd query parameters as that one causes an infinite loop.
contTarget, err := url.Parse(loc.Query().Get("continue"))
if err == nil {
v := contTarget.Query()
v.Del("cbrd")
contTarget.RawQuery = v.Encode()
return contTarget.String()
}
return loc.Query().Get("continue")
}
if loc.Host == "www.instagram.com" && loc.Query().Has("next") {
// Instagram likes its login page, we on the other side don't
// care about the sign-in or even the content. Therefore we
// just take their redirect target and use that as the next
// URL
return loc.Query().Get("next")
}
// Default fallback behavior: Do a normal resolve
return origin.ResolveReference(loc).String()
}
func (Checker) getJar() *cookiejar.Jar {
jar, _ := cookiejar.New(nil)
return jar
}
func (c Checker) scanDotObfuscation(message string) (links []string) {
message = regexp.MustCompile(`(?i)\s*\(?dot\)?\s*`).ReplaceAllString(message, ".")
return c.scanPlainNoObfuscate(message)
}
func (c Checker) scanObfuscateSpace(message string) (links []string) {
// Spammers use spaces in their links to prevent link protection matches
parts := regexp.MustCompile(`\s+`).Split(message, -1)
for i := 0; i < len(parts)-1; i++ {
if link := c.resolveFinal(strings.Join(parts[i:i+2], ""), c.getJar(), nil, c.userAgent()); link != "" {
links = append(links, link)
}
}
return links
}
func (c Checker) scanObfuscateSpecialCharsAndSpaces(message string) (links []string) {
// First clean URL from all characters not acceptable in Domains (plus some extra chars)
message = dropSet.ReplaceAllString(message, "")
return c.scanObfuscateSpace(message)
}
func (c Checker) scanPlainNoObfuscate(message string) (links []string) {
parts := regexp.MustCompile(`\s+`).Split(message, -1)
for _, part := range parts {
if link := c.resolveFinal(part, c.getJar(), nil, c.userAgent()); link != "" {
links = append(links, link)
}
}
return links
}
func (c Checker) userAgent() string {
n, _ := rand.Int(rand.Reader, big.NewInt(int64(len(c.userAgents))))
return c.userAgents[n.Int64()]
}

View file

@ -0,0 +1,180 @@
package linkcheck
import (
"fmt"
"net/http"
"net/http/httptest"
"sort"
"strconv"
"testing"
"github.com/gorilla/mux"
"github.com/stretchr/testify/assert"
)
func TestInfiniteRedirect(t *testing.T) {
hdl := http.NewServeMux()
hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/test", http.StatusFound) })
hdl.HandleFunc("/test", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/", http.StatusFound) })
var (
c = New()
ts = httptest.NewServer(hdl)
)
t.Cleanup(ts.Close)
c.skipValidation = true
msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL)
// We expect /test to be the first repeat as the callstack will look like this:
// ":12345", ":12345/test", ":12345/", ":12345/test" (which is the duplicate)
assert.Equal(t, []string{fmt.Sprintf("%s/test", ts.URL)}, c.ScanForLinks(msg))
}
func TestMaxRedirects(t *testing.T) {
hdl := mux.NewRouter()
hdl.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.Redirect(w, r, "/1", http.StatusFound) })
hdl.HandleFunc("/{num}", func(w http.ResponseWriter, r *http.Request) {
tn, _ := strconv.Atoi(mux.Vars(r)["num"])
http.Redirect(w, r, fmt.Sprintf("/%d", tn+1), http.StatusFound)
})
var (
c = New()
ts = httptest.NewServer(hdl)
)
t.Cleanup(ts.Close)
c.skipValidation = true
msg := fmt.Sprintf("Here have a redirect loop: %s", ts.URL)
// We expect the call to `/N` to have N previous entries and therefore be the break-point
assert.Equal(t, []string{fmt.Sprintf("%s/%d", ts.URL, maxRedirects)}, c.ScanForLinks(msg))
}
func TestScanForLinks(t *testing.T) {
if testing.Short() {
t.SkipNow()
}
c := New()
for _, testCase := range []struct {
Message string
ExpectedLinks []string
}{
// Case: full URL is present in the message
{
Message: "https://example.com",
ExpectedLinks: []string{
"https://example.com",
},
},
// Case: full bitly link is present in the message
{
Message: "https://bit.ly/438obkJ",
ExpectedLinks: []string{
"https://example.com/",
},
},
// Case: link is present just without the protocol
{
Message: "Here, take a look at this: bit.ly/438obkJ",
ExpectedLinks: []string{
"https://example.com/",
},
},
// Case: message with vk.cc shortener
{
Message: "See more here: vk.cc/ckGZN2",
ExpectedLinks: []string{
"https://vk.com/club206261664",
},
},
// Case: link is obfuscated using space
{
Message: "Take a look at example. com",
ExpectedLinks: []string{
"http://example.com",
},
},
// Case: link is obfuscated using space and braces
{
Message: "Take a look at example. (com)",
ExpectedLinks: []string{
"http://example.com",
},
},
// Case: multiple links in one message
{
Message: "https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT https://bit.ly/438obkJ",
ExpectedLinks: []string{
"https://clips.twitch.tv/WrongEnchantingMinkFutureMan-EKlDjYkvDeurO9XT",
"https://example.com/",
},
},
// Case: obfuscation with "dot"
{
Message: "I'm live now on twitch dot tv/twitch",
ExpectedLinks: []string{
"https://www.twitch.tv/twitch",
},
},
// Case: enhanced "dot" obfuscation
{
Message: "You can visit Archive(Dot) org in your browser",
ExpectedLinks: []string{
"http://Archive.org",
},
},
// Case: Youtube does weird stuff
{
Message: "https://luziferus.tv/youtube",
ExpectedLinks: []string{
"https://www.youtube.com/channel/UCjsRmaAQ0IHR2CNEBqfNOSQ",
},
},
// Case: Instagram also does weird things
{
Message: "https://bit.ly/3KHpJuy",
ExpectedLinks: []string{
"https://www.instagram.com/instagram/",
},
},
// Case: false positives
{Message: "game dot exe has stopped working", ExpectedLinks: nil},
{Message: "You're following since 12.12.2020 DogChamp", ExpectedLinks: nil},
} {
t.Run(testCase.Message, func(t *testing.T) {
linksFound := c.ScanForLinks(testCase.Message)
sort.Strings(linksFound)
assert.Equal(t, testCase.ExpectedLinks, linksFound, "links from message %q", testCase.Message)
})
}
}
func TestUserAgentListNotEmpty(t *testing.T) {
if len(defaultUserAgents) == 0 {
t.Fatal("found empty user-agent list")
}
}
func TestUserAgentRandomizer(t *testing.T) {
var (
c = New()
uas = map[string]int{}
)
for i := 0; i < 10; i++ {
uas[c.userAgent()]++
}
for _, c := range uas {
assert.Less(t, c, 10)
}
assert.Equal(t, 0, uas[""]) // there should be no empty UA
}

View file

@ -0,0 +1,43 @@
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.57
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36
Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.56
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.53 Safari/537.36 Edg/103.0.1264.37
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.85 Safari/537.36 Edg/90.0.818.46
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Whale/3.19.166.16 Safari/537.36
Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.76
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.46
Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.192.400 QQBrowser/11.5.5250.400
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.78
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36
Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 OPR/95.0.0.0
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.67
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:108.0) Gecko/20100101 Firefox/108.0
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36
Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763
Mozilla/5.0 (X11; Linux x86_64; rv:108.0) Gecko/20100101 Firefox/108.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36
Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63
Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/111.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.61
Mozilla/5.0 (Windows NT 10.0; rv:109.0) Gecko/20100101 Firefox/110.0
Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36 Edg/109.0.1518.70

65
pkg/twitch/clips.go Normal file
View file

@ -0,0 +1,65 @@
package twitch
import (
"context"
"fmt"
"net/http"
"time"
"github.com/pkg/errors"
)
const clipCacheTimeout = 10 * time.Minute // Clips do not change that fast
type (
ClipInfo struct {
ID string `json:"id"`
URL string `json:"url"`
EmbedURL string `json:"embed_url"`
BroadcasterID string `json:"broadcaster_id"`
BroadcasterName string `json:"broadcaster_name"`
CreatorID string `json:"creator_id"`
CreatorName string `json:"creator_name"`
VideoID string `json:"video_id"`
GameID string `json:"game_id"`
Language string `json:"language"`
Title string `json:"title"`
ViewCount int64 `json:"view_count"`
CreatedAt time.Time `json:"created_at"`
ThumbnailURL string `json:"thumbnail_url"`
Duration float64 `json:"duration"`
VodOffset int64 `json:"vod_offset"`
}
)
// GetClipByID gets a video clip that were captured from streams by
// its ID (slug in the URL)
func (c *Client) GetClipByID(ctx context.Context, clipID string) (ClipInfo, error) {
cacheKey := []string{"getClipByID", clipID}
if clip := c.apiCache.Get(cacheKey); clip != nil {
return clip.(ClipInfo), nil
}
var payload struct {
Data []ClipInfo
}
if err := c.request(clientRequestOpts{
AuthType: authTypeAppAccessToken,
Context: ctx,
Method: http.MethodGet,
OKStatus: http.StatusOK,
Out: &payload,
URL: fmt.Sprintf("https://api.twitch.tv/helix/clips?id=%s", clipID),
}); err != nil {
return ClipInfo{}, errors.Wrap(err, "getting clip info")
}
if l := len(payload.Data); l != 1 {
return ClipInfo{}, errors.Errorf("unexpected number of clip info returned: %d", l)
}
c.apiCache.Set(cacheKey, clipCacheTimeout, payload.Data[0])
return payload.Data[0], nil
}

View file

@ -17,17 +17,17 @@ var (
) )
type FieldCollection struct { type FieldCollection struct {
data map[string]interface{} data map[string]any
lock sync.RWMutex lock sync.RWMutex
} }
// NewFieldCollection creates a new FieldCollection with empty data store // NewFieldCollection creates a new FieldCollection with empty data store
func NewFieldCollection() *FieldCollection { func NewFieldCollection() *FieldCollection {
return &FieldCollection{data: make(map[string]interface{})} return &FieldCollection{data: make(map[string]any)}
} }
// FieldCollectionFromData is a wrapper around NewFieldCollection and SetFromData // FieldCollectionFromData is a wrapper around NewFieldCollection and SetFromData
func FieldCollectionFromData(data map[string]interface{}) *FieldCollection { func FieldCollectionFromData(data map[string]any) *FieldCollection {
o := NewFieldCollection() o := NewFieldCollection()
o.SetFromData(data) o.SetFromData(data)
return o return o
@ -65,7 +65,7 @@ func (f *FieldCollection) Clone() *FieldCollection {
} }
// Data creates a map-copy of the data stored inside the FieldCollection // Data creates a map-copy of the data stored inside the FieldCollection
func (f *FieldCollection) Data() map[string]interface{} { func (f *FieldCollection) Data() map[string]any {
if f == nil { if f == nil {
return nil return nil
} }
@ -73,7 +73,7 @@ func (f *FieldCollection) Data() map[string]interface{} {
f.lock.RLock() f.lock.RLock()
defer f.lock.RUnlock() defer f.lock.RUnlock()
out := make(map[string]interface{}) out := make(map[string]any)
for k := range f.data { for k := range f.data {
out[k] = f.data[k] out[k] = f.data[k]
} }
@ -162,6 +162,32 @@ func (f *FieldCollection) MustString(name string, defVal *string) string {
return v return v
} }
// MustStringSlice is a wrapper around StringSlice and returns nil in case name is not set
func (f *FieldCollection) MustStringSlice(name string) []string {
v, err := f.StringSlice(name)
if err != nil {
return nil
}
return v
}
// Any tries to read key name as any-type (interface)
func (f *FieldCollection) Any(name string) (any, error) {
if f == nil || f.data == nil {
return false, errors.New("uninitialized field collection")
}
f.lock.RLock()
defer f.lock.RUnlock()
v, ok := f.data[name]
if !ok {
return false, ErrValueNotSet
}
return v, nil
}
// Bool tries to read key name as bool // Bool tries to read key name as bool
func (f *FieldCollection) Bool(name string) (bool, error) { func (f *FieldCollection) Bool(name string) (bool, error) {
if f == nil || f.data == nil { if f == nil || f.data == nil {
@ -236,7 +262,7 @@ func (f *FieldCollection) Int64(name string) (int64, error) {
} }
// Set sets a single key to specified value // Set sets a single key to specified value
func (f *FieldCollection) Set(key string, value interface{}) { func (f *FieldCollection) Set(key string, value any) {
if f == nil { if f == nil {
f = NewFieldCollection() f = NewFieldCollection()
} }
@ -245,14 +271,14 @@ func (f *FieldCollection) Set(key string, value interface{}) {
defer f.lock.Unlock() defer f.lock.Unlock()
if f.data == nil { if f.data == nil {
f.data = make(map[string]interface{}) f.data = make(map[string]any)
} }
f.data[key] = value f.data[key] = value
} }
// SetFromData takes a map of data and copies all data into the FieldCollection // SetFromData takes a map of data and copies all data into the FieldCollection
func (f *FieldCollection) SetFromData(data map[string]interface{}) { func (f *FieldCollection) SetFromData(data map[string]any) {
if f == nil { if f == nil {
f = NewFieldCollection() f = NewFieldCollection()
} }
@ -261,7 +287,7 @@ func (f *FieldCollection) SetFromData(data map[string]interface{}) {
defer f.lock.Unlock() defer f.lock.Unlock()
if f.data == nil { if f.data == nil {
f.data = make(map[string]interface{}) f.data = make(map[string]any)
} }
for key, value := range data { for key, value := range data {
@ -312,7 +338,7 @@ func (f *FieldCollection) StringSlice(name string) ([]string, error) {
case []string: case []string:
return v, nil return v, nil
case []interface{}: case []any:
var out []string var out []string
for _, iv := range v { for _, iv := range v {
@ -329,7 +355,7 @@ func (f *FieldCollection) StringSlice(name string) ([]string, error) {
return nil, ErrValueMismatch return nil, ErrValueMismatch
} }
// Implement JSON marshalling to plain underlying map[string]interface{} // Implement JSON marshalling to plain underlying map[string]any
func (f *FieldCollection) MarshalJSON() ([]byte, error) { func (f *FieldCollection) MarshalJSON() ([]byte, error) {
if f == nil || f.data == nil { if f == nil || f.data == nil {
@ -343,7 +369,7 @@ func (f *FieldCollection) MarshalJSON() ([]byte, error) {
} }
func (f *FieldCollection) UnmarshalJSON(raw []byte) error { func (f *FieldCollection) UnmarshalJSON(raw []byte) error {
data := make(map[string]interface{}) data := make(map[string]any)
if err := json.Unmarshal(raw, &data); err != nil { if err := json.Unmarshal(raw, &data); err != nil {
return errors.Wrap(err, "unmarshalling from JSON") return errors.Wrap(err, "unmarshalling from JSON")
} }
@ -352,14 +378,14 @@ func (f *FieldCollection) UnmarshalJSON(raw []byte) error {
return nil return nil
} }
// Implement YAML marshalling to plain underlying map[string]interface{} // Implement YAML marshalling to plain underlying map[string]any
func (f *FieldCollection) MarshalYAML() (interface{}, error) { func (f *FieldCollection) MarshalYAML() (any, error) {
return f.Data(), nil return f.Data(), nil
} }
func (f *FieldCollection) UnmarshalYAML(unmarshal func(interface{}) error) error { func (f *FieldCollection) UnmarshalYAML(unmarshal func(any) error) error {
data := make(map[string]interface{}) data := make(map[string]any)
if err := unmarshal(&data); err != nil { if err := unmarshal(&data); err != nil {
return errors.Wrap(err, "unmarshalling from YAML") return errors.Wrap(err, "unmarshalling from YAML")
} }

View file

@ -12,12 +12,15 @@ import (
"github.com/Luzifer/go_helpers/v2/str" "github.com/Luzifer/go_helpers/v2/str"
"github.com/Luzifer/twitch-bot/v3/internal/actors/announce" "github.com/Luzifer/twitch-bot/v3/internal/actors/announce"
"github.com/Luzifer/twitch-bot/v3/internal/actors/ban" "github.com/Luzifer/twitch-bot/v3/internal/actors/ban"
"github.com/Luzifer/twitch-bot/v3/internal/actors/clipdetector"
"github.com/Luzifer/twitch-bot/v3/internal/actors/commercial" "github.com/Luzifer/twitch-bot/v3/internal/actors/commercial"
"github.com/Luzifer/twitch-bot/v3/internal/actors/counter" "github.com/Luzifer/twitch-bot/v3/internal/actors/counter"
"github.com/Luzifer/twitch-bot/v3/internal/actors/delay" "github.com/Luzifer/twitch-bot/v3/internal/actors/delay"
deleteactor "github.com/Luzifer/twitch-bot/v3/internal/actors/delete" deleteactor "github.com/Luzifer/twitch-bot/v3/internal/actors/delete"
"github.com/Luzifer/twitch-bot/v3/internal/actors/eventmod" "github.com/Luzifer/twitch-bot/v3/internal/actors/eventmod"
"github.com/Luzifer/twitch-bot/v3/internal/actors/filesay" "github.com/Luzifer/twitch-bot/v3/internal/actors/filesay"
"github.com/Luzifer/twitch-bot/v3/internal/actors/linkdetector"
"github.com/Luzifer/twitch-bot/v3/internal/actors/linkprotect"
logActor "github.com/Luzifer/twitch-bot/v3/internal/actors/log" logActor "github.com/Luzifer/twitch-bot/v3/internal/actors/log"
"github.com/Luzifer/twitch-bot/v3/internal/actors/modchannel" "github.com/Luzifer/twitch-bot/v3/internal/actors/modchannel"
"github.com/Luzifer/twitch-bot/v3/internal/actors/nuke" "github.com/Luzifer/twitch-bot/v3/internal/actors/nuke"
@ -52,12 +55,15 @@ var (
// Actors // Actors
announce.Register, announce.Register,
ban.Register, ban.Register,
clipdetector.Register,
commercial.Register, commercial.Register,
counter.Register, counter.Register,
delay.Register, delay.Register,
deleteactor.Register, deleteactor.Register,
eventmod.Register, eventmod.Register,
filesay.Register, filesay.Register,
linkdetector.Register,
linkprotect.Register,
logActor.Register, logActor.Register,
modchannel.Register, modchannel.Register,
nuke.Register, nuke.Register,

View file

@ -100,6 +100,47 @@ Delete message which caused the rule to be executed
# Does not have configuration attributes # Does not have configuration attributes
``` ```
## Enforce Link-Protection
Uses link- and clip-scanner to detect links / clips and applies link protection as defined
```yaml
- type: linkprotect
attributes:
# Allowed links (if any is specified all non matching links will cause enforcement action, link must contain any of these strings)
# Optional: true
# Type: array of strings
allowed_links: []
# Disallowed links (if any is specified all non matching links will not cause enforcement action, link must contain any of these strings)
# Optional: true
# Type: array of strings
disallowed_links: []
# Allowed clip channels (if any is specified clips of all other channels will cause enforcement action, clip-links will be ignored in link-protection when this is used)
# Optional: true
# Type: array of strings
allowed_clip_channels: []
# Disallowed clip channels (if any is specified clips of all other channels will not cause enforcement action, clip-links will be ignored in link-protection when this is used)
# Optional: true
# Type: array of strings
disallowed_clip_channels: []
# Enforcement action to take when disallowed link / clip is detected (ban, delete, duration-value i.e. 1m)
# Optional: false
# Type: string
action: ""
# Reason why the enforcement action was taken
# Optional: false
# Type: string
reason: ""
# Stop rule execution when action is applied (i.e. not to post a message after a ban for spam links)
# Optional: true
# Type: bool
stop_on_action: false
# Stop rule execution when no action is applied (i.e. not to post a message when no enforcement action is taken)
# Optional: true
# Type: bool
stop_on_no_action: false
```
## Execute Script / Command ## Execute Script / Command
Execute external script / command Execute external script / command
@ -340,6 +381,24 @@ Respond to message with a new message
to_channel: "" to_channel: ""
``` ```
## Scan for Clips
Scans for clip-links in the message and adds the "clips" field to the event data
```yaml
- type: clipdetector
# Does not have configuration attributes
```
## Scan for Links
Scans for links in the message and adds the "links" field to the event data
```yaml
- type: linkdetector
# Does not have configuration attributes
```
## Send RAW Message ## Send RAW Message
Send raw IRC message Send raw IRC message