2024-03-31 12:44:43 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"net/http"
|
|
|
|
"net/url"
|
|
|
|
"os"
|
|
|
|
"os/exec"
|
2024-04-20 12:04:57 +00:00
|
|
|
"sync"
|
2024-03-31 12:44:43 +00:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/sirupsen/logrus"
|
|
|
|
"golang.org/x/net/context"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
checkKillWait = 100 * time.Millisecond
|
|
|
|
remoteConfigFetchTimeout = 2 * time.Second
|
|
|
|
)
|
|
|
|
|
|
|
|
type (
|
|
|
|
checkCommand struct {
|
|
|
|
Name string `yaml:"name"`
|
|
|
|
Command string `yaml:"command"`
|
|
|
|
WarnOnly bool `yaml:"warn_only"`
|
|
|
|
}
|
|
|
|
|
|
|
|
checkResult struct {
|
|
|
|
Check checkCommand
|
|
|
|
IsSuccess bool
|
|
|
|
Streak int64
|
|
|
|
}
|
|
|
|
)
|
|
|
|
|
|
|
|
func executeAndRegisterCheck(ctx context.Context, checkID string) {
|
|
|
|
var (
|
|
|
|
check = checks[checkID]
|
|
|
|
logger = logrus.WithField("check_id", checkID)
|
2024-04-28 10:47:46 +00:00
|
|
|
stderr = logger.WithField("stream", "STDERR").Writer()
|
|
|
|
stdout = logger.WithField("stream", "STDOUT").Writer()
|
2024-03-31 12:44:43 +00:00
|
|
|
)
|
|
|
|
|
2024-04-28 10:47:46 +00:00
|
|
|
defer func() {
|
|
|
|
for _, c := range []io.Closer{stderr, stdout} {
|
|
|
|
if err := c.Close(); err != nil {
|
|
|
|
logrus.WithError(err).Error("closing check log-writer (leaked mem)")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2024-03-31 12:44:43 +00:00
|
|
|
cmd := exec.Command("/bin/bash", "-e", "-o", "pipefail", "-c", check.Command) //#nosec G204 // Intended to run an user-defined command
|
2024-04-28 10:47:46 +00:00
|
|
|
cmd.Stderr = stderr
|
2024-03-31 12:44:43 +00:00
|
|
|
if cfg.Verbose {
|
2024-04-28 10:47:46 +00:00
|
|
|
cmd.Stdout = stdout
|
2024-03-31 12:44:43 +00:00
|
|
|
}
|
|
|
|
err := cmd.Start()
|
|
|
|
|
|
|
|
if err == nil {
|
|
|
|
cmdDone := make(chan error)
|
|
|
|
go func(cmdDone chan error, cmd *exec.Cmd) { cmdDone <- cmd.Wait() }(cmdDone, cmd)
|
|
|
|
loop := true
|
|
|
|
for loop {
|
|
|
|
select {
|
|
|
|
case err = <-cmdDone:
|
|
|
|
loop = false
|
|
|
|
case <-ctx.Done():
|
|
|
|
logger.Error("execution of check will be killed through context timeout")
|
|
|
|
if err := cmd.Process.Kill(); err != nil {
|
|
|
|
logger.WithError(err).Error("killing check command")
|
|
|
|
}
|
|
|
|
time.Sleep(checkKillWait)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
success := err == nil
|
|
|
|
|
|
|
|
checkResultsLock.Lock()
|
|
|
|
|
|
|
|
if _, ok := checkResults[checkID]; !ok {
|
|
|
|
checkResults[checkID] = &checkResult{
|
|
|
|
Check: check,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if success == checkResults[checkID].IsSuccess {
|
|
|
|
checkResults[checkID].Streak++
|
|
|
|
} else {
|
|
|
|
checkResults[checkID].IsSuccess = success
|
|
|
|
checkResults[checkID].Streak = 1
|
|
|
|
}
|
|
|
|
|
|
|
|
if !success {
|
|
|
|
logger.WithError(err).WithField("streak", checkResults[checkID].Streak).Warn("check failed, streak increased")
|
|
|
|
}
|
|
|
|
|
|
|
|
lastResultRegistered = time.Now()
|
|
|
|
|
|
|
|
checkResultsLock.Unlock()
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadChecks() error {
|
|
|
|
var rawChecks io.Reader
|
|
|
|
|
|
|
|
if _, err := os.Stat(cfg.CheckDefinitionsFile); err == nil {
|
|
|
|
// We got a local file, read it
|
|
|
|
f, err := os.Open(cfg.CheckDefinitionsFile)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("opening file: %w", err)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err := f.Close(); err != nil {
|
|
|
|
logrus.WithError(err).Error("closing configfile (leaked fd)")
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
rawChecks = f
|
|
|
|
} else {
|
|
|
|
// Check whether we got an URL
|
|
|
|
if _, err := url.Parse(cfg.CheckDefinitionsFile); err != nil {
|
|
|
|
return errors.New("definitions file is neither a local file nor a URL")
|
|
|
|
}
|
|
|
|
|
|
|
|
// We got an URL, fetch and read it
|
|
|
|
ctx, cancel := context.WithTimeout(context.TODO(), remoteConfigFetchTimeout)
|
|
|
|
defer cancel()
|
|
|
|
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, cfg.CheckDefinitionsFile, nil)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("creating request: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := http.DefaultClient.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("executing request: %w", err)
|
|
|
|
}
|
|
|
|
defer func() {
|
|
|
|
if err := resp.Body.Close(); err != nil {
|
|
|
|
logrus.WithError(err).Error("closing configfile request body (leaked fd)")
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
rawChecks = resp.Body
|
|
|
|
}
|
|
|
|
|
|
|
|
tmpResult := map[string]checkCommand{}
|
|
|
|
if err := yaml.NewDecoder(rawChecks).Decode(&tmpResult); err != nil {
|
|
|
|
return fmt.Errorf("decoding checks file: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
checks = tmpResult
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func spawnChecks() {
|
2024-04-20 12:04:57 +00:00
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), cfg.CheckInterval-time.Second)
|
|
|
|
|
|
|
|
wg.Add(len(checks))
|
|
|
|
go func() {
|
|
|
|
// Do not block the execution function but cleanup the context after
|
|
|
|
// all checks are done (or cancelled)
|
|
|
|
wg.Wait()
|
|
|
|
cancel()
|
|
|
|
}()
|
2024-03-31 12:44:43 +00:00
|
|
|
|
|
|
|
for id := range checks {
|
2024-04-20 12:04:57 +00:00
|
|
|
go func(ctx context.Context, id string) {
|
|
|
|
defer wg.Done()
|
|
|
|
executeAndRegisterCheck(ctx, id)
|
|
|
|
}(ctx, id)
|
2024-03-31 12:44:43 +00:00
|
|
|
}
|
|
|
|
}
|