2016-06-03 10:55:19 +00:00
package main
import (
"bytes"
"fmt"
"io"
"net/http"
"os"
"strconv"
"sync"
"time"
2022-05-02 14:10:12 +00:00
"github.com/robfig/cron/v3"
2024-03-31 12:44:43 +00:00
"github.com/sirupsen/logrus"
2022-05-02 14:10:12 +00:00
"github.com/Luzifer/rconfig/v2"
2016-06-03 10:55:19 +00:00
)
var (
cfg = struct {
2016-07-22 09:22:29 +00:00
CheckDefinitionsFile string ` flag:"check-definitions-file,c" default:"/etc/elb-instance-status.yml" description:"File or URL containing checks to perform for instance health" `
2016-06-03 10:55:19 +00:00
UnhealthyThreshold int64 ` flag:"unhealthy-threshold" default:"5" description:"How often does a check have to fail to mark the machine unhealthy" `
2016-07-22 11:18:10 +00:00
CheckInterval time . Duration ` flag:"check-interval" default:"1m" description:"How often to execute checks (do not set below 10s!)" `
ConfigRefreshInterval time . Duration ` flag:"config-refresh" default:"10m" description:"How often to update checks from definitions file / url" `
2024-03-31 12:44:43 +00:00
Verbose bool ` flag:"verbose,v" default:"false" description:"Attach stdout of the executed commands" `
LogLevel string ` flag:"log-level" default:"info" description:"Log level (debug, info, warn, error, fatal)" `
2016-08-04 10:15:44 +00:00
2016-07-22 11:18:10 +00:00
Listen string ` flag:"listen" default:":3000" description:"IP/Port to listen on for ELB health checks" `
VersionAndExit bool ` flag:"version" default:"false" description:"Print version and exit" `
2016-06-03 10:55:19 +00:00
} { }
version = "dev"
2016-07-22 09:22:29 +00:00
checks map [ string ] checkCommand
2016-06-03 10:55:19 +00:00
checkResults = map [ string ] * checkResult { }
checkResultsLock sync . RWMutex
lastResultRegistered time . Time
)
2024-03-31 12:44:43 +00:00
func initApp ( ) ( err error ) {
rconfig . AutoEnv ( true )
if err = rconfig . Parse ( & cfg ) ; err != nil {
return fmt . Errorf ( "parsing CLI options: %w" , err )
2016-06-03 10:55:19 +00:00
}
2016-07-22 09:22:29 +00:00
2024-03-31 12:44:43 +00:00
l , err := logrus . ParseLevel ( cfg . LogLevel )
if err != nil {
return fmt . Errorf ( "parsing log-level: %w" , err )
2018-04-27 09:55:43 +00:00
}
2024-03-31 12:44:43 +00:00
logrus . SetLevel ( l )
2016-07-22 09:22:29 +00:00
2018-04-27 09:55:43 +00:00
return nil
2016-06-03 10:55:19 +00:00
}
func main ( ) {
2024-03-31 12:44:43 +00:00
var err error
if err = initApp ( ) ; err != nil {
logrus . WithError ( err ) . Fatal ( "initializing app" )
2016-06-03 10:55:19 +00:00
}
2024-03-31 12:44:43 +00:00
if cfg . VersionAndExit {
fmt . Printf ( "elb-instance-status %s\n" , version ) //nolint:forbidigo
os . Exit ( 0 )
2022-05-02 14:10:12 +00:00
}
2016-07-22 09:35:24 +00:00
2024-03-31 12:44:43 +00:00
if err = loadChecks ( ) ; err != nil {
logrus . WithError ( err ) . Fatal ( "reading definitions file" )
2016-06-03 10:55:19 +00:00
}
2024-03-31 12:44:43 +00:00
c := cron . New ( )
2016-06-03 10:55:19 +00:00
2024-03-31 12:44:43 +00:00
if _ , err = c . AddFunc ( fmt . Sprintf ( "@every %s" , cfg . CheckInterval ) , spawnChecks ) ; err != nil {
logrus . WithError ( err ) . Fatal ( "registering spawn function" )
2016-08-04 10:15:44 +00:00
}
2024-03-31 12:44:43 +00:00
if _ , err = c . AddFunc ( fmt . Sprintf ( "@every %s" , cfg . ConfigRefreshInterval ) , func ( ) {
if err := loadChecks ( ) ; err != nil {
logrus . WithError ( err ) . Error ( "refreshing checks" )
2016-07-22 09:35:24 +00:00
}
2024-03-31 12:44:43 +00:00
} ) ; err != nil {
logrus . WithError ( err ) . Fatal ( "registering config-refresh function" )
2016-07-22 09:35:24 +00:00
}
2016-06-03 10:55:19 +00:00
2024-03-31 12:44:43 +00:00
c . Start ( )
2016-06-03 10:55:19 +00:00
2024-03-31 12:44:43 +00:00
spawnChecks ( )
2016-06-03 10:55:19 +00:00
2024-03-31 12:44:43 +00:00
http . HandleFunc ( "/status" , handleELBHealthCheck )
2016-06-03 10:55:19 +00:00
2024-03-31 12:44:43 +00:00
server := & http . Server {
Addr : cfg . Listen ,
Handler : http . DefaultServeMux ,
ReadHeaderTimeout : time . Second ,
2016-06-03 10:55:19 +00:00
}
2024-03-31 12:44:43 +00:00
if err = server . ListenAndServe ( ) ; err != nil {
logrus . WithError ( err ) . Fatal ( "listening for HTTP traffic" )
2016-11-29 11:15:58 +00:00
}
2016-06-03 10:55:19 +00:00
}
2024-03-31 12:44:43 +00:00
func handleELBHealthCheck ( w http . ResponseWriter , _ * http . Request ) {
var (
healthy = true
start = time . Now ( )
buf = new ( bytes . Buffer )
)
2016-06-03 10:55:19 +00:00
checkResultsLock . RLock ( )
2016-06-06 14:33:45 +00:00
for _ , cr := range checkResults {
2016-06-03 10:55:19 +00:00
state := ""
switch {
case cr . IsSuccess :
state = "PASS"
case ! cr . IsSuccess && cr . Check . WarnOnly :
state = "WARN"
case ! cr . IsSuccess && ! cr . Check . WarnOnly && cr . Streak < cfg . UnhealthyThreshold :
state = "CRIT"
case ! cr . IsSuccess && ! cr . Check . WarnOnly && cr . Streak >= cfg . UnhealthyThreshold :
state = "CRIT"
healthy = false
}
2016-06-06 14:33:45 +00:00
fmt . Fprintf ( buf , "[%s] %s\n" , state , cr . Check . Name )
2016-06-03 10:55:19 +00:00
}
checkResultsLock . RUnlock ( )
2024-03-31 12:44:43 +00:00
w . Header ( ) . Set ( "X-Collection-Parsed-In" , strconv . FormatInt ( time . Since ( start ) . Nanoseconds ( ) / int64 ( time . Microsecond ) , 10 ) + "ms" )
w . Header ( ) . Set ( "X-Last-Result-Registered-At" , lastResultRegistered . Format ( time . RFC1123 ) )
2016-06-03 10:55:19 +00:00
if healthy {
2024-03-31 12:44:43 +00:00
w . WriteHeader ( http . StatusOK )
2016-06-03 10:55:19 +00:00
} else {
2024-03-31 12:44:43 +00:00
w . WriteHeader ( http . StatusInternalServerError )
2016-06-03 10:55:19 +00:00
}
2024-03-31 12:44:43 +00:00
if _ , err := io . Copy ( w , buf ) ; err != nil {
logrus . WithError ( err ) . Error ( "writing HTTP response body" )
}
2016-06-03 10:55:19 +00:00
}