From 18fb20e1c96a223235fdb4671b3953dc6579fc13 Mon Sep 17 00:00:00 2001 From: Craig Peterson Date: Wed, 3 Feb 2016 10:30:10 -0700 Subject: [PATCH] Convert to more incident-centric state model from redis. --- build/validate.sh | 2 +- cmd/bosun/conf/conf.go | 20 +- cmd/bosun/conf/notify.go | 11 +- cmd/bosun/database/database.go | 2 +- cmd/bosun/database/incident_data.go | 128 --- cmd/bosun/database/state_data.go | 397 +++++++++ cmd/bosun/database/test/database_test.go | 2 +- cmd/bosun/database/test/incidents_test.go | 65 -- cmd/bosun/database/test/testSetup.go | 8 +- cmd/bosun/expr/elastic.go | 57 +- cmd/bosun/expr/expr.go | 49 +- cmd/bosun/expr/funcs.go | 155 ++-- cmd/bosun/expr/influx.go | 11 +- cmd/bosun/expr/logstash.go | 9 +- cmd/bosun/expr/parse/node.go | 30 +- cmd/bosun/expr/parse/parse.go | 38 +- cmd/bosun/expr/parse/parse_test.go | 18 +- cmd/bosun/sched/bolt.go | 291 +++++-- cmd/bosun/sched/check.go | 400 +++++---- cmd/bosun/sched/check_test.go | 134 ++- cmd/bosun/sched/depends_test.go | 87 +- cmd/bosun/sched/filter.go | 37 +- cmd/bosun/sched/grouping_test.go | 7 +- cmd/bosun/sched/host.go | 24 +- cmd/bosun/sched/notification_test.go | 34 +- cmd/bosun/sched/notify.go | 76 +- cmd/bosun/sched/sched.go | 460 ++-------- cmd/bosun/sched/sched_test.go | 121 +-- cmd/bosun/sched/silence.go | 37 +- cmd/bosun/sched/template.go | 68 +- cmd/bosun/search/search_test.go | 2 +- cmd/bosun/web/chart.go | 4 +- cmd/bosun/web/expr.go | 51 +- cmd/bosun/web/relay_test.go | 2 +- cmd/bosun/web/static.go | 794 +++++++++--------- cmd/bosun/web/static/js/bosun.js | 21 +- cmd/bosun/web/static/js/history.ts | 13 +- cmd/bosun/web/static/js/incident.ts | 4 +- cmd/bosun/web/static/js/state.ts | 4 +- cmd/bosun/web/static/partials/alertstate.html | 4 +- cmd/bosun/web/web.go | 112 ++- models/incidents.go | 228 ++++- slog/slog.go | 27 + 43 files changed, 2099 insertions(+), 1945 deletions(-) delete mode 100644 cmd/bosun/database/incident_data.go create mode 100644 cmd/bosun/database/state_data.go delete mode 100644 cmd/bosun/database/test/incidents_test.go diff --git a/build/validate.sh b/build/validate.sh index 035df237f3..cd7404bea4 100755 --- a/build/validate.sh +++ b/build/validate.sh @@ -61,7 +61,7 @@ if [ "$GOGENERATEDIFF" != '' ]; then fi echo -e "\nRunning go test bosun.org/..." -go test bosun.org/... +go test -v bosun.org/... GOTESTRESULT=$? if [ "$GOTESTRESULT" != 0 ]; then BUILDMSG="${BUILDMSG}tests fail." diff --git a/cmd/bosun/conf/conf.go b/cmd/bosun/conf/conf.go index 4165f2ed96..193f53c48e 100644 --- a/cmd/bosun/conf/conf.go +++ b/cmd/bosun/conf/conf.go @@ -244,7 +244,7 @@ type Alert struct { UnjoinedOK bool `json:",omitempty"` Log bool RunEvery int - returnType eparse.FuncType + returnType models.FuncType template string squelch []string @@ -934,7 +934,7 @@ func (c *Conf) loadAlert(s *parse.SectionNode) { c.errorf("neither crit or warn specified") } var tags eparse.Tags - var ret eparse.FuncType + var ret models.FuncType if a.Crit != nil { ctags, err := a.Crit.Root.Tags() if err != nil { @@ -1188,7 +1188,7 @@ func (c *Conf) NewExpr(s string) *expr.Expr { c.error(err) } switch exp.Root.Return() { - case eparse.TypeNumberSet, eparse.TypeScalar: + case models.TypeNumberSet, models.TypeScalar: break default: c.errorf("expression must return a number") @@ -1306,7 +1306,7 @@ func (c *Conf) Funcs() map[string]eparse.Func { if err != nil { return nil, err } - if a.returnType != eparse.TypeNumberSet { + if a.returnType != models.TypeNumberSet { return nil, fmt.Errorf("alert requires a number-returning expression (got %v)", a.returnType) } return e.Root.Tags() @@ -1314,20 +1314,20 @@ func (c *Conf) Funcs() map[string]eparse.Func { funcs := map[string]eparse.Func{ "alert": { - Args: []eparse.FuncType{eparse.TypeString, eparse.TypeString}, - Return: eparse.TypeNumberSet, + Args: []models.FuncType{models.TypeString, models.TypeString}, + Return: models.TypeNumberSet, Tags: tagAlert, F: c.alert, }, "lookup": { - Args: []eparse.FuncType{eparse.TypeString, eparse.TypeString}, - Return: eparse.TypeNumberSet, + Args: []models.FuncType{models.TypeString, models.TypeString}, + Return: models.TypeNumberSet, Tags: lookupTags, F: lookup, }, "lookupSeries": { - Args: []eparse.FuncType{eparse.TypeSeriesSet, eparse.TypeString, eparse.TypeString}, - Return: eparse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeString, models.TypeString}, + Return: models.TypeNumberSet, Tags: lookupSeriesTags, F: lookupSeries, }, diff --git a/cmd/bosun/conf/notify.go b/cmd/bosun/conf/notify.go index bbf4476faf..6fbd6ad65c 100644 --- a/cmd/bosun/conf/notify.go +++ b/cmd/bosun/conf/notify.go @@ -12,6 +12,7 @@ import ( "bosun.org/_third_party/github.com/jordan-wright/email" "bosun.org/collect" "bosun.org/metadata" + "bosun.org/models" "bosun.org/slog" "bosun.org/util" ) @@ -25,7 +26,7 @@ func init() { "The number of email notifications that Bosun failed to send.") } -func (n *Notification) Notify(subject, body string, emailsubject, emailbody []byte, c *Conf, ak string, attachments ...*Attachment) { +func (n *Notification) Notify(subject, body string, emailsubject, emailbody []byte, c *Conf, ak string, attachments ...*models.Attachment) { if len(n.Email) > 0 { go n.DoEmail(emailsubject, emailbody, c, ak, attachments...) } @@ -93,13 +94,7 @@ func (n *Notification) DoGet(ak string) { } } -type Attachment struct { - Data []byte - Filename string - ContentType string -} - -func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*Attachment) { +func (n *Notification) DoEmail(subject, body []byte, c *Conf, ak string, attachments ...*models.Attachment) { e := email.NewEmail() e.From = c.EmailFrom for _, a := range n.Email { diff --git a/cmd/bosun/database/database.go b/cmd/bosun/database/database.go index 43702bb70b..1dba0d2cad 100644 --- a/cmd/bosun/database/database.go +++ b/cmd/bosun/database/database.go @@ -20,8 +20,8 @@ type DataAccess interface { Metadata() MetadataDataAccess Search() SearchDataAccess Errors() ErrorDataAccess + State() StateDataAccess Silence() SilenceDataAccess - Incidents() IncidentDataAccess } type MetadataDataAccess interface { diff --git a/cmd/bosun/database/incident_data.go b/cmd/bosun/database/incident_data.go deleted file mode 100644 index a07f59bcd7..0000000000 --- a/cmd/bosun/database/incident_data.go +++ /dev/null @@ -1,128 +0,0 @@ -package database - -import ( - "encoding/json" - "time" - - "bosun.org/_third_party/github.com/garyburd/redigo/redis" - "bosun.org/collect" - "bosun.org/models" - "bosun.org/opentsdb" -) - -/* - -incidents: hash of {id} -> json of incident -maxIncidentId: counter. Increment to get next id. -incidentsByStart: sorted set by start date - -*/ - -type IncidentDataAccess interface { - GetIncident(id uint64) (*models.Incident, error) - CreateIncident(ak models.AlertKey, start time.Time) (*models.Incident, error) - UpdateIncident(id uint64, i *models.Incident) error - - GetIncidentsStartingInRange(start, end time.Time) ([]*models.Incident, error) - - // should only be used by initial import - SetMaxId(id uint64) error -} - -func (d *dataAccess) Incidents() IncidentDataAccess { - return d -} - -func (d *dataAccess) GetIncident(id uint64) (*models.Incident, error) { - defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetIncident"})() - conn := d.GetConnection() - defer conn.Close() - raw, err := redis.Bytes(conn.Do("HGET", "incidents", id)) - if err != nil { - return nil, err - } - incident := &models.Incident{} - if err = json.Unmarshal(raw, incident); err != nil { - return nil, err - } - return incident, nil -} - -func (d *dataAccess) CreateIncident(ak models.AlertKey, start time.Time) (*models.Incident, error) { - defer collect.StartTimer("redis", opentsdb.TagSet{"op": "CreateIncident"})() - conn := d.GetConnection() - defer conn.Close() - id, err := redis.Int64(conn.Do("INCR", "maxIncidentId")) - if err != nil { - return nil, err - } - incident := &models.Incident{ - Id: uint64(id), - Start: start, - AlertKey: ak, - } - err = saveIncident(incident.Id, incident, conn) - if err != nil { - return nil, err - } - return incident, nil -} - -func saveIncident(id uint64, i *models.Incident, conn redis.Conn) error { - raw, err := json.Marshal(i) - if err != nil { - return err - } - if _, err = conn.Do("HSET", "incidents", id, raw); err != nil { - return err - } - if _, err = conn.Do("ZADD", "incidentsByStart", i.Start.UTC().Unix(), id); err != nil { - return err - } - return nil -} - -func (d *dataAccess) GetIncidentsStartingInRange(start, end time.Time) ([]*models.Incident, error) { - defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetIncidentsStartingInRange"})() - conn := d.GetConnection() - defer conn.Close() - - ids, err := redis.Ints(conn.Do("ZRANGEBYSCORE", "incidentsByStart", start.UTC().Unix(), end.UTC().Unix())) - if err != nil { - return nil, err - } - args := make([]interface{}, len(ids)+1) - args[0] = "incidents" - for i := range ids { - args[i+1] = ids[i] - } - jsons, err := redis.Strings(conn.Do("HMGET", args...)) - if err != nil { - return nil, err - } - incidents := make([]*models.Incident, len(jsons)) - for i := range jsons { - inc := &models.Incident{} - if err = json.Unmarshal([]byte(jsons[i]), inc); err != nil { - return nil, err - } - incidents[i] = inc - } - return incidents, nil -} - -func (d *dataAccess) UpdateIncident(id uint64, i *models.Incident) error { - defer collect.StartTimer("redis", opentsdb.TagSet{"op": "UpdateIncident"})() - conn := d.GetConnection() - defer conn.Close() - return saveIncident(id, i, conn) -} - -func (d *dataAccess) SetMaxId(id uint64) error { - defer collect.StartTimer("redis", opentsdb.TagSet{"op": "SetMaxId"})() - conn := d.GetConnection() - defer conn.Close() - - _, err := conn.Do("SET", "maxIncidentId", id) - return err -} diff --git a/cmd/bosun/database/state_data.go b/cmd/bosun/database/state_data.go new file mode 100644 index 0000000000..09ed4bac01 --- /dev/null +++ b/cmd/bosun/database/state_data.go @@ -0,0 +1,397 @@ +package database + +import ( + "encoding/json" + "fmt" + "time" + + "bosun.org/_third_party/github.com/garyburd/redigo/redis" + "bosun.org/collect" + "bosun.org/models" + "bosun.org/opentsdb" + "bosun.org/slog" +) + +/* +incidentById:{id} - json encoded state. Authoritative source. + +lastTouched:{alert} - ZSET of alert key to last touched time stamp +unknown:{alert} - Set of unknown alert keys for alert +unevel:{alert} - Set of unevaluated alert keys for alert + +openIncidents - Hash of open incident Ids. Alert Key -> incident id +incidents:{ak} - List of incidents for alert key + +allIncidents - List of all incidents ever. Value is "incidentId:timestamp:ak" +*/ + +const ( + statesOpenIncidentsKey = "openIncidents" +) + +func statesLastTouchedKey(alert string) string { + return fmt.Sprintf("lastTouched:%s", alert) +} +func statesUnknownKey(alert string) string { + return fmt.Sprintf("unknown:%s", alert) +} +func statesUnevalKey(alert string) string { + return fmt.Sprintf("uneval:%s", alert) +} +func incidentStateKey(id int64) string { + return fmt.Sprintf("incidentById:%d", id) +} +func incidentsForAlertKeyKey(ak models.AlertKey) string { + return fmt.Sprintf("incidents:%s", ak) +} + +type StateDataAccess interface { + TouchAlertKey(ak models.AlertKey, t time.Time) error + GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) + + GetOpenIncident(ak models.AlertKey) (*models.IncidentState, error) + GetLatestIncident(ak models.AlertKey) (*models.IncidentState, error) + GetAllOpenIncidents() ([]*models.IncidentState, error) + GetIncidentState(incidentId int64) (*models.IncidentState, error) + GetAllIncidents(ak models.AlertKey) ([]*models.IncidentState, error) + + UpdateIncidentState(s *models.IncidentState) error + ImportIncidentState(s *models.IncidentState) error + + Forget(ak models.AlertKey) error + SetUnevaluated(ak models.AlertKey, uneval bool) error + GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) +} + +func (d *dataAccess) State() StateDataAccess { + return d +} + +func (d *dataAccess) TouchAlertKey(ak models.AlertKey, t time.Time) error { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "TouchAlertKey"})() + conn := d.GetConnection() + defer conn.Close() + + _, err := conn.Do("ZADD", statesLastTouchedKey(ak.Name()), t.UTC().Unix(), string(ak)) + return slog.Wrap(err) +} + +func (d *dataAccess) GetUntouchedSince(alert string, time int64) ([]models.AlertKey, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUntouchedSince"})() + conn := d.GetConnection() + defer conn.Close() + + results, err := redis.Strings(conn.Do("ZRANGEBYSCORE", statesLastTouchedKey(alert), "-inf", time)) + if err != nil { + return nil, slog.Wrap(err) + } + aks := make([]models.AlertKey, len(results)) + for i := range results { + aks[i] = models.AlertKey(results[i]) + } + return aks, nil +} + +func (d *dataAccess) GetOpenIncident(ak models.AlertKey) (*models.IncidentState, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetOpenIncident"})() + conn := d.GetConnection() + defer conn.Close() + + inc, err := d.getLatestIncident(ak, conn) + if err != nil { + return nil, slog.Wrap(err) + } + if inc == nil { + return nil, nil + } + if inc.Open { + return inc, nil + } + return nil, nil +} + +func (d *dataAccess) getLatestIncident(ak models.AlertKey, conn redis.Conn) (*models.IncidentState, error) { + id, err := redis.Int64(conn.Do("LINDEX", incidentsForAlertKeyKey(ak), 0)) + if err != nil { + if err == redis.ErrNil { + return nil, nil + } + return nil, slog.Wrap(err) + } + inc, err := d.getIncident(id, conn) + if err != nil { + return nil, slog.Wrap(err) + } + return inc, nil +} + +func (d *dataAccess) GetLatestIncident(ak models.AlertKey) (*models.IncidentState, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetLatestIncident"})() + conn := d.GetConnection() + defer conn.Close() + + return d.getLatestIncident(ak, conn) +} + +func (d *dataAccess) GetAllOpenIncidents() ([]*models.IncidentState, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetAllOpenIncidents"})() + conn := d.GetConnection() + defer conn.Close() + + // get open ids + ids, err := int64s(conn.Do("HVALS", statesOpenIncidentsKey)) + if err != nil { + return nil, slog.Wrap(err) + } + return d.incidentMultiGet(conn, ids) +} + +func (d *dataAccess) GetAllIncidents(ak models.AlertKey) ([]*models.IncidentState, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetAllIncidents"})() + conn := d.GetConnection() + defer conn.Close() + + ids, err := int64s(conn.Do("LRANGE", incidentsForAlertKeyKey(ak), 0, -1)) + if err != nil { + return nil, slog.Wrap(err) + } + return d.incidentMultiGet(conn, ids) +} + +func (d *dataAccess) incidentMultiGet(conn redis.Conn, ids []int64) ([]*models.IncidentState, error) { + if len(ids) == 0 { + return nil, nil + } + // get all incident json keys + args := make([]interface{}, 0, len(ids)) + for _, id := range ids { + args = append(args, incidentStateKey(id)) + } + jsons, err := redis.Strings(conn.Do("MGET", args...)) + if err != nil { + return nil, slog.Wrap(err) + } + results := make([]*models.IncidentState, 0, len(jsons)) + for _, j := range jsons { + state := &models.IncidentState{} + if err = json.Unmarshal([]byte(j), state); err != nil { + return nil, slog.Wrap(err) + } + results = append(results, state) + } + return results, nil +} + +func (d *dataAccess) getIncident(incidentId int64, conn redis.Conn) (*models.IncidentState, error) { + b, err := redis.Bytes(conn.Do("GET", incidentStateKey(incidentId))) + if err != nil { + return nil, slog.Wrap(err) + } + state := &models.IncidentState{} + if err = json.Unmarshal(b, state); err != nil { + return nil, slog.Wrap(err) + } + return state, nil +} + +func (d *dataAccess) GetIncidentState(incidentId int64) (*models.IncidentState, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetIncident"})() + conn := d.GetConnection() + defer conn.Close() + return d.getIncident(incidentId, conn) +} + +func (d *dataAccess) UpdateIncidentState(s *models.IncidentState) error { + return d.save(s, false) +} + +func (d *dataAccess) ImportIncidentState(s *models.IncidentState) error { + return d.save(s, true) +} + +func (d *dataAccess) save(s *models.IncidentState, isImport bool) error { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "UpdateIncident"})() + conn := d.GetConnection() + defer conn.Close() + + isNew := false + //if id is still zero, assign new id. + if s.Id == 0 { + id, err := redis.Int64(conn.Do("INCR", "maxIncidentId")) + if err != nil { + return slog.Wrap(err) + } + s.Id = id + isNew = true + } else if isImport { + max, err := redis.Int64(conn.Do("GET", "maxIncidentId")) + if err != nil { + max = 0 + } + if max < s.Id { + if _, err = conn.Do("SET", "maxIncidentId", s.Id); err != nil { + return slog.Wrap(err) + } + } + isNew = true + } + return d.transact(conn, func() error { + if isNew { + // add to list for alert key + if _, err := conn.Do("LPUSH", incidentsForAlertKeyKey(s.AlertKey), s.Id); err != nil { + return slog.Wrap(err) + } + dat := fmt.Sprintf("%d:%d:%s", s.Id, s.Start.UTC().Unix(), s.AlertKey) + if _, err := conn.Do("LPUSH", "allIncidents", dat); err != nil { + return slog.Wrap(err) + } + } + + // store the incident json + data, err := json.Marshal(s) + if err != nil { + return slog.Wrap(err) + } + _, err = conn.Do("SET", incidentStateKey(s.Id), data) + + addRem := func(b bool) string { + if b { + return "SADD" + } + return "SREM" + } + // appropriately add or remove it from the "open" set + if s.Open { + if _, err = conn.Do("HSET", statesOpenIncidentsKey, s.AlertKey, s.Id); err != nil { + return slog.Wrap(err) + } + } else { + if _, err = conn.Do("HDEL", statesOpenIncidentsKey, s.AlertKey); err != nil { + return slog.Wrap(err) + } + } + + //appropriately add or remove from unknown and uneval sets + if _, err = conn.Do(addRem(s.CurrentStatus == models.StUnknown), statesUnknownKey(s.Alert), s.AlertKey); err != nil { + return slog.Wrap(err) + } + if _, err = conn.Do(addRem(s.Unevaluated), statesUnevalKey(s.Alert), s.AlertKey); err != nil { + return slog.Wrap(err) + } + return nil + }) +} + +func (d *dataAccess) SetUnevaluated(ak models.AlertKey, uneval bool) error { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "SetUnevaluated"})() + conn := d.GetConnection() + defer conn.Close() + + op := "SREM" + if uneval { + op = "SADD" + } + _, err := conn.Do(op, statesUnevalKey(ak.Name()), ak) + return slog.Wrap(err) +} + +// The nucular option. Delete all we know about this alert key +func (d *dataAccess) Forget(ak models.AlertKey) error { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "Forget"})() + conn := d.GetConnection() + defer conn.Close() + + alert := ak.Name() + return d.transact(conn, func() error { + // last touched. + if _, err := conn.Do("HDEL", statesLastTouchedKey(alert), ak); err != nil { + return slog.Wrap(err) + } + // unknown/uneval sets + if _, err := conn.Do("SREM", statesUnknownKey(alert), ak); err != nil { + return slog.Wrap(err) + } + if _, err := conn.Do("SREM", statesUnevalKey(alert), ak); err != nil { + return slog.Wrap(err) + } + //open set + if _, err := conn.Do("HDEL", statesOpenIncidentsKey, ak); err != nil { + return slog.Wrap(err) + } + //all incidents + ids, err := int64s(conn.Do("LRANGE", incidentsForAlertKeyKey(ak), 0, -1)) + if err != nil { + return slog.Wrap(err) + } + if _, err = conn.Do("HDEL", statesOpenIncidentsKey, ak); err != nil { + return slog.Wrap(err) + } + for _, id := range ids { + + if _, err = conn.Do("DEL", incidentStateKey(id)); err != nil { + return slog.Wrap(err) + } + } + if _, err := conn.Do(d.LCLEAR(), incidentsForAlertKeyKey(ak)); err != nil { + return slog.Wrap(err) + } + return nil + }) +} + +func (d *dataAccess) GetUnknownAndUnevalAlertKeys(alert string) ([]models.AlertKey, []models.AlertKey, error) { + defer collect.StartTimer("redis", opentsdb.TagSet{"op": "GetUnknownAndUnevalAlertKeys"})() + conn := d.GetConnection() + defer conn.Close() + + unknownS, err := redis.Strings(conn.Do("SMEMBERS", statesUnknownKey(alert))) + if err != nil { + return nil, nil, slog.Wrap(err) + } + unknown := make([]models.AlertKey, len(unknownS)) + for i, u := range unknownS { + unknown[i] = models.AlertKey(u) + } + + unEvals, err := redis.Strings(conn.Do("SMEMBERS", statesUnevalKey(alert))) + if err != nil { + return nil, nil, slog.Wrap(err) + } + unevals := make([]models.AlertKey, len(unEvals)) + for i, u := range unEvals { + unevals[i] = models.AlertKey(u) + } + + return unknown, unevals, nil +} + +func int64s(reply interface{}, err error) ([]int64, error) { + if err != nil { + return nil, slog.Wrap(err) + } + ints := []int64{} + values, err := redis.Values(reply, err) + if err != nil { + return ints, slog.Wrap(err) + } + if err := redis.ScanSlice(values, &ints); err != nil { + return ints, slog.Wrap(err) + } + return ints, nil +} + +func (d *dataAccess) transact(conn redis.Conn, f func() error) error { + if !d.isRedis { + return f() + } + if _, err := conn.Do("MULTI"); err != nil { + return slog.Wrap(err) + } + if err := f(); err != nil { + return slog.Wrap(err) + } + if _, err := conn.Do("EXEC"); err != nil { + return slog.Wrap(err) + } + return nil +} diff --git a/cmd/bosun/database/test/database_test.go b/cmd/bosun/database/test/database_test.go index bf0f6764dc..adebb3b5de 100644 --- a/cmd/bosun/database/test/database_test.go +++ b/cmd/bosun/database/test/database_test.go @@ -18,7 +18,7 @@ var testData database.DataAccess func TestMain(m *testing.M) { rand.Seed(time.Now().UnixNano()) var closeF func() - testData, closeF = StartTestRedis() + testData, closeF = StartTestRedis(9993) status := m.Run() closeF() os.Exit(status) diff --git a/cmd/bosun/database/test/incidents_test.go b/cmd/bosun/database/test/incidents_test.go deleted file mode 100644 index ce52efe597..0000000000 --- a/cmd/bosun/database/test/incidents_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package dbtest - -import ( - "testing" - "time" -) - -func TestIncidents_RoundTrip(t *testing.T) { - inc := testData.Incidents() - - i, err := inc.CreateIncident("foo{host=3}", time.Now().UTC()) - check(t, err) - - i2, err := inc.CreateIncident("foo{host=3}", time.Now().UTC()) - check(t, err) - - if i.Id >= i2.Id { - t.Fatal("Expect ids to be ascending") - } - - readBack, err := inc.GetIncident(i.Id) - check(t, err) - if readBack.AlertKey != i.AlertKey { - t.Fatal("Alert key's don't match") - } - - tm := time.Now().UTC().Add(42 * time.Hour) - i.End = &tm - check(t, inc.UpdateIncident(i.Id, i)) - - readBack, err = inc.GetIncident(i.Id) - check(t, err) - if *readBack.End != tm { - t.Fatal("End times don't match") - } -} - -func TestIncidentSearch(t *testing.T) { - inc := testData.Incidents() - - startTime := time.Now().Add(-5000 * time.Hour).UTC() - - i, err := inc.CreateIncident("foo2{host=3}", startTime) - check(t, err) - - i2, err := inc.CreateIncident("foo2{host=4}", startTime) - check(t, err) - - _, err = inc.CreateIncident("foo2{host=4}", startTime.Add(500*time.Hour)) - check(t, err) - _, err = inc.CreateIncident("foo2{host=4}", startTime.Add(-500*time.Hour)) - check(t, err) - i3, err := inc.CreateIncident("BAR{host=4}", startTime) - check(t, err) - - results, err := inc.GetIncidentsStartingInRange(startTime.Add(-1*time.Hour), startTime.Add(time.Hour)) - check(t, err) - - if len(results) != 3 { - t.Fatal("Wrong number of results") - } - if results[0].Id != i.Id || results[1].Id != i2.Id || results[2].Id != i3.Id { - t.Fatal("Ids don't match", results, i.Id, i2.Id, i3.Id) - } -} diff --git a/cmd/bosun/database/test/testSetup.go b/cmd/bosun/database/test/testSetup.go index 64635c92ec..5222d711d3 100644 --- a/cmd/bosun/database/test/testSetup.go +++ b/cmd/bosun/database/test/testSetup.go @@ -14,7 +14,7 @@ import ( var flagReddisHost = flag.String("redis", "", "redis server to test against") var flagFlushRedis = flag.Bool("flush", false, "flush database before tests. DANGER!") -func StartTestRedis() (database.DataAccess, func()) { +func StartTestRedis(port int) (database.DataAccess, func()) { flag.Parse() // For redis tests we just point at an external server. if *flagReddisHost != "" { @@ -31,9 +31,9 @@ func StartTestRedis() (database.DataAccess, func()) { return testData, func() {} } // To test ledis, start a local instance in a new tmp dir. We will attempt to delete it when we're done. - addr := "127.0.0.1:9876" - testPath := filepath.Join(os.TempDir(), "bosun_ledis_test", fmt.Sprint(time.Now().Unix())) - log.Println(testPath) + addr := fmt.Sprintf("127.0.0.1:%d", port) + testPath := filepath.Join(os.TempDir(), "bosun_ledis_test", fmt.Sprintf("%d-%d", time.Now().UnixNano(), port)) + log.Println("Test ledis at", testPath, addr) stop, err := database.StartLedis(testPath, addr) if err != nil { log.Fatal(err) diff --git a/cmd/bosun/expr/elastic.go b/cmd/bosun/expr/elastic.go index 6eae3570ad..b845c62174 100644 --- a/cmd/bosun/expr/elastic.go +++ b/cmd/bosun/expr/elastic.go @@ -9,6 +9,7 @@ import ( "bosun.org/_third_party/github.com/MiniProfiler/go/miniprofiler" elastic "bosun.org/_third_party/gopkg.in/olivere/elastic.v3" "bosun.org/cmd/bosun/expr/parse" + "bosun.org/models" "bosun.org/opentsdb" ) @@ -29,85 +30,85 @@ func elasticTagQuery(args []parse.Node) (parse.Tags, error) { var Elastic = map[string]parse.Func{ // Funcs for querying elastic "escount": { - Args: []parse.FuncType{parse.TypeESIndexer, parse.TypeString, parse.TypeESQuery, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeESIndexer, models.TypeString, models.TypeESQuery, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: elasticTagQuery, F: ESCount, }, "esstat": { - Args: []parse.FuncType{parse.TypeESIndexer, parse.TypeString, parse.TypeESQuery, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeESIndexer, models.TypeString, models.TypeESQuery, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: elasticTagQuery, F: ESStat, }, // Funcs to create elastic index names (ESIndexer type) "esindices": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString}, + Args: []models.FuncType{models.TypeString, models.TypeString}, VArgs: true, VArgsPos: 1, - Return: parse.TypeESIndexer, + Return: models.TypeESIndexer, F: ESIndicies, }, "esdaily": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString}, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString}, VArgs: true, VArgsPos: 1, - Return: parse.TypeESIndexer, + Return: models.TypeESIndexer, F: ESDaily, }, "esls": { - Args: []parse.FuncType{parse.TypeString}, - Return: parse.TypeESIndexer, + Args: []models.FuncType{models.TypeString}, + Return: models.TypeESIndexer, F: ESLS, }, // Funcs for generate elastic queries (ESQuery Type) to further filter results "esall": { - Args: []parse.FuncType{}, - Return: parse.TypeESQuery, + Args: []models.FuncType{}, + Return: models.TypeESQuery, F: ESAll, }, "esregexp": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeString}, + Return: models.TypeESQuery, F: ESRegexp, }, "esquery": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeString}, + Return: models.TypeESQuery, F: ESQueryString, }, "esand": { - Args: []parse.FuncType{parse.TypeESQuery}, + Args: []models.FuncType{models.TypeESQuery}, VArgs: true, - Return: parse.TypeESQuery, + Return: models.TypeESQuery, F: ESAnd, }, "esor": { - Args: []parse.FuncType{parse.TypeESQuery}, + Args: []models.FuncType{models.TypeESQuery}, VArgs: true, - Return: parse.TypeESQuery, + Return: models.TypeESQuery, F: ESOr, }, "esgt": { - Args: []parse.FuncType{parse.TypeString, parse.TypeScalar}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeScalar}, + Return: models.TypeESQuery, F: ESGT, }, "esgte": { - Args: []parse.FuncType{parse.TypeString, parse.TypeScalar}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeScalar}, + Return: models.TypeESQuery, F: ESGTE, }, "eslt": { - Args: []parse.FuncType{parse.TypeString, parse.TypeScalar}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeScalar}, + Return: models.TypeESQuery, F: ESLT, }, "eslte": { - Args: []parse.FuncType{parse.TypeString, parse.TypeScalar}, - Return: parse.TypeESQuery, + Args: []models.FuncType{models.TypeString, models.TypeScalar}, + Return: models.TypeESQuery, F: ESLTE, }, } diff --git a/cmd/bosun/expr/expr.go b/cmd/bosun/expr/expr.go index f845ac84df..86e31c177c 100644 --- a/cmd/bosun/expr/expr.go +++ b/cmd/bosun/expr/expr.go @@ -140,11 +140,6 @@ func errRecover(errp *error) { } } -type Value interface { - Type() parse.FuncType - Value() interface{} -} - func marshalFloat(n float64) ([]byte, error) { if math.IsNaN(n) { return json.Marshal("NaN") @@ -156,23 +151,28 @@ func marshalFloat(n float64) ([]byte, error) { return json.Marshal(n) } +type Value interface { + Type() models.FuncType + Value() interface{} +} + type Number float64 -func (n Number) Type() parse.FuncType { return parse.TypeNumberSet } +func (n Number) Type() models.FuncType { return models.TypeNumberSet } func (n Number) Value() interface{} { return n } func (n Number) MarshalJSON() ([]byte, error) { return marshalFloat(float64(n)) } type Scalar float64 -func (s Scalar) Type() parse.FuncType { return parse.TypeScalar } +func (s Scalar) Type() models.FuncType { return models.TypeScalar } func (s Scalar) Value() interface{} { return s } func (s Scalar) MarshalJSON() ([]byte, error) { return marshalFloat(float64(s)) } // Series is the standard form within bosun to represent timeseries data. type Series map[time.Time]float64 -func (s Series) Type() parse.FuncType { return parse.TypeSeriesSet } -func (s Series) Value() interface{} { return s } +func (s Series) Type() models.FuncType { return models.TypeSeriesSet } +func (s Series) Value() interface{} { return s } func (s Series) MarshalJSON() ([]byte, error) { r := make(map[string]interface{}, len(s)) @@ -186,8 +186,8 @@ type ESQuery struct { Query elastic.Query } -func (e ESQuery) Type() parse.FuncType { return parse.TypeESQuery } -func (e ESQuery) Value() interface{} { return e } +func (e ESQuery) Type() models.FuncType { return models.TypeESQuery } +func (e ESQuery) Value() interface{} { return e } func (e ESQuery) MarshalJSON() ([]byte, error) { source, err := e.Query.Source() if err != nil { @@ -201,8 +201,8 @@ type ESIndexer struct { Generate func(startDuration, endDuration *time.Time) ([]string, error) } -func (e ESIndexer) Type() parse.FuncType { return parse.TypeESIndexer } -func (e ESIndexer) Value() interface{} { return e } +func (e ESIndexer) Type() models.FuncType { return models.TypeESIndexer } +func (e ESIndexer) Value() interface{} { return e } func (e ESIndexer) MarshalJSON() ([]byte, error) { return json.Marshal("ESGenerator") } @@ -231,7 +231,7 @@ func NewSortedSeries(dps Series) SortableSeries { } type Result struct { - Computations + models.Computations Value Group opentsdb.TagSet } @@ -289,22 +289,15 @@ func (r ResultSliceByGroup) Len() int { return len(r) } func (r ResultSliceByGroup) Swap(i, j int) { r[i], r[j] = r[j], r[i] } func (r ResultSliceByGroup) Less(i, j int) bool { return r[i].Group.String() < r[j].Group.String() } -type Computations []Computation - -type Computation struct { - Text string - Value interface{} -} - func (e *State) AddComputation(r *Result, text string, value interface{}) { if !e.enableComputations { return } - r.Computations = append(r.Computations, Computation{opentsdb.ReplaceTags(text, r.Group), value}) + r.Computations = append(r.Computations, models.Computation{Text: opentsdb.ReplaceTags(text, r.Group), Value: value}) } type Union struct { - Computations + models.Computations A, B Value Group opentsdb.TagSet } @@ -636,7 +629,7 @@ func (e *State) walkFunc(node *parse.FuncNode, T miniprofiler.Timer) *Results { default: panic(fmt.Errorf("expr: unknown func arg type")) } - if f, ok := v.(float64); ok && node.F.Args[i] == parse.TypeNumberSet { + if f, ok := v.(float64); ok && node.F.Args[i] == models.TypeNumberSet { v = fromScalar(f) } in = append(in, reflect.ValueOf(v)) @@ -650,7 +643,7 @@ func (e *State) walkFunc(node *parse.FuncNode, T miniprofiler.Timer) *Results { panic(err) } } - if node.Return() == parse.TypeNumberSet { + if node.Return() == models.TypeNumberSet { for _, r := range res.Results { e.AddComputation(r, node.String(), r.Value.(Number)) } @@ -661,13 +654,13 @@ func (e *State) walkFunc(node *parse.FuncNode, T miniprofiler.Timer) *Results { // extract will return a float64 if res contains exactly one scalar or a ESQuery if that is the type func extract(res *Results) interface{} { - if len(res.Results) == 1 && res.Results[0].Type() == parse.TypeScalar { + if len(res.Results) == 1 && res.Results[0].Type() == models.TypeScalar { return float64(res.Results[0].Value.Value().(Scalar)) } - if len(res.Results) == 1 && res.Results[0].Type() == parse.TypeESQuery { + if len(res.Results) == 1 && res.Results[0].Type() == models.TypeESQuery { return res.Results[0].Value.Value() } - if len(res.Results) == 1 && res.Results[0].Type() == parse.TypeESIndexer { + if len(res.Results) == 1 && res.Results[0].Type() == models.TypeESIndexer { return res.Results[0].Value.Value() } return res diff --git a/cmd/bosun/expr/funcs.go b/cmd/bosun/expr/funcs.go index f84d413ace..b8f29351c2 100644 --- a/cmd/bosun/expr/funcs.go +++ b/cmd/bosun/expr/funcs.go @@ -14,6 +14,7 @@ import ( "bosun.org/_third_party/github.com/MiniProfiler/go/miniprofiler" "bosun.org/cmd/bosun/expr/parse" "bosun.org/graphite" + "bosun.org/models" "bosun.org/opentsdb" "bosun.org/slog" ) @@ -79,14 +80,14 @@ func tagRename(args []parse.Node) (parse.Tags, error) { // Graphite defines functions for use with a Graphite backend. var Graphite = map[string]parse.Func{ "graphiteBand": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeScalar}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeScalar}, + Return: models.TypeSeriesSet, Tags: graphiteTagQuery, F: GraphiteBand, }, "graphite": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: graphiteTagQuery, F: GraphiteQuery, }, @@ -95,31 +96,31 @@ var Graphite = map[string]parse.Func{ // TSDB defines functions for use with an OpenTSDB backend. var TSDB = map[string]parse.Func{ "band": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeScalar}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeScalar}, + Return: models.TypeSeriesSet, Tags: tagQuery, F: Band, }, "change": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeNumberSet, Tags: tagQuery, F: Change, }, "count": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeScalar, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeScalar, F: Count, }, "q": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: tagQuery, F: Query, }, "window": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeScalar, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeScalar, models.TypeString}, + Return: models.TypeSeriesSet, Tags: tagQuery, F: Window, Check: windowCheck, @@ -130,191 +131,191 @@ var builtins = map[string]parse.Func{ // Reduction functions "avg": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Avg, }, "cCount": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: CCount, }, "dev": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Dev, }, "diff": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Diff, }, "first": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: First, }, "forecastlr": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Forecast_lr, }, "last": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Last, }, "len": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Length, }, "max": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Max, }, "median": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Median, }, "min": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Min, }, "percentile": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Percentile, }, "since": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Since, }, "sum": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Sum, }, "streak": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Streak, }, // Group functions "rename": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeString}, + Return: models.TypeSeriesSet, Tags: tagRename, F: Rename, }, "t": { - Args: []parse.FuncType{parse.TypeNumberSet, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeNumberSet, models.TypeString}, + Return: models.TypeSeriesSet, Tags: tagTranspose, F: Transpose, }, "ungroup": { - Args: []parse.FuncType{parse.TypeNumberSet}, - Return: parse.TypeScalar, + Args: []models.FuncType{models.TypeNumberSet}, + Return: models.TypeScalar, F: Ungroup, }, // Other functions "abs": { - Args: []parse.FuncType{parse.TypeNumberSet}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeNumberSet}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Abs, }, "d": { - Args: []parse.FuncType{parse.TypeString}, - Return: parse.TypeScalar, + Args: []models.FuncType{models.TypeString}, + Return: models.TypeScalar, F: Duration, }, "des": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeScalar, parse.TypeScalar}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeScalar, models.TypeScalar}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: Des, }, "dropge": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: DropGe, }, "dropg": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: DropG, }, "drople": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: DropLe, }, "dropl": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: DropL, }, "dropna": { - Args: []parse.FuncType{parse.TypeSeriesSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: DropNA, }, "epoch": { - Args: []parse.FuncType{}, - Return: parse.TypeScalar, + Args: []models.FuncType{}, + Return: models.TypeScalar, F: Epoch, }, "filter": { - Args: []parse.FuncType{parse.TypeSeriesSet, parse.TypeNumberSet}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeSeriesSet, models.TypeNumberSet}, + Return: models.TypeSeriesSet, Tags: tagFirst, F: Filter, }, "limit": { - Args: []parse.FuncType{parse.TypeNumberSet, parse.TypeScalar}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeNumberSet, models.TypeScalar}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Limit, }, "nv": { - Args: []parse.FuncType{parse.TypeNumberSet, parse.TypeScalar}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeNumberSet, models.TypeScalar}, + Return: models.TypeNumberSet, Tags: tagFirst, F: NV, }, "sort": { - Args: []parse.FuncType{parse.TypeNumberSet, parse.TypeString}, - Return: parse.TypeNumberSet, + Args: []models.FuncType{models.TypeNumberSet, models.TypeString}, + Return: models.TypeNumberSet, Tags: tagFirst, F: Sort, }, @@ -687,7 +688,7 @@ func windowCheck(t *parse.Tree, f *parse.FuncNode) error { if !ok { return fmt.Errorf("expr: Window: unknown function %v", name) } - if len(v.Args) != 1 || v.Args[0] != parse.TypeSeriesSet || v.Return != parse.TypeNumberSet { + if len(v.Args) != 1 || v.Args[0] != models.TypeSeriesSet || v.Return != models.TypeNumberSet { return fmt.Errorf("expr: Window: %v is not a reduction function", name) } return nil diff --git a/cmd/bosun/expr/influx.go b/cmd/bosun/expr/influx.go index ff2d9661e0..a9df1fe1a4 100644 --- a/cmd/bosun/expr/influx.go +++ b/cmd/bosun/expr/influx.go @@ -9,16 +9,17 @@ import ( "bosun.org/_third_party/github.com/MiniProfiler/go/miniprofiler" "bosun.org/_third_party/github.com/influxdb/influxdb/client" "bosun.org/_third_party/github.com/influxdb/influxdb/influxql" - "bosun.org/_third_party/github.com/influxdb/influxdb/models" + influxModels "bosun.org/_third_party/github.com/influxdb/influxdb/models" "bosun.org/cmd/bosun/expr/parse" + "bosun.org/models" "bosun.org/opentsdb" ) // Influx is a map of functions to query InfluxDB. var Influx = map[string]parse.Func{ "influx": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: influxTag, F: InfluxQuery, }, @@ -186,7 +187,7 @@ func influxQueryDuration(now time.Time, query, start, end, groupByInterval strin return s.String(), nil } -func timeInfluxRequest(e *State, T miniprofiler.Timer, db, query, startDuration, endDuration, groupByInterval string) (s []models.Row, err error) { +func timeInfluxRequest(e *State, T miniprofiler.Timer, db, query, startDuration, endDuration, groupByInterval string) (s []influxModels.Row, err error) { q, err := influxQueryDuration(e.now, query, startDuration, endDuration, groupByInterval) if err != nil { return nil, err @@ -216,7 +217,7 @@ func timeInfluxRequest(e *State, T miniprofiler.Timer, db, query, startDuration, var val interface{} var ok bool val, err = e.cache.Get(q, getFn) - if s, ok = val.([]models.Row); !ok { + if s, ok = val.([]influxModels.Row); !ok { err = fmt.Errorf("influx: did not get a valid result from InfluxDB") } }) diff --git a/cmd/bosun/expr/logstash.go b/cmd/bosun/expr/logstash.go index 48aabf3973..2873ebf6b5 100644 --- a/cmd/bosun/expr/logstash.go +++ b/cmd/bosun/expr/logstash.go @@ -10,6 +10,7 @@ import ( "bosun.org/_third_party/github.com/MiniProfiler/go/miniprofiler" "bosun.org/_third_party/github.com/olivere/elastic" "bosun.org/cmd/bosun/expr/parse" + "bosun.org/models" "bosun.org/opentsdb" ) @@ -20,14 +21,14 @@ var lsClient *elastic.Client // logstash. They are only loaded when the elastic hosts are set in the config file var LogstashElastic = map[string]parse.Func{ "lscount": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: logstashTagQuery, F: LSCount, }, "lsstat": { - Args: []parse.FuncType{parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString, parse.TypeString}, - Return: parse.TypeSeriesSet, + Args: []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString, models.TypeString}, + Return: models.TypeSeriesSet, Tags: logstashTagQuery, F: LSStat, }, diff --git a/cmd/bosun/expr/parse/node.go b/cmd/bosun/expr/parse/node.go index 631c991999..4c2a185088 100644 --- a/cmd/bosun/expr/parse/node.go +++ b/cmd/bosun/expr/parse/node.go @@ -9,6 +9,8 @@ package parse import ( "fmt" "strconv" + + "bosun.org/models" ) var textFormat = "%s" // Changed to "%q" in tests for better error messages. @@ -22,7 +24,7 @@ type Node interface { StringAST() string Position() Pos // byte position of start of node in full original input string Check(*Tree) error // performs type checking for itself and sub-nodes - Return() FuncType + Return() models.FuncType Tags() (Tags, error) // Make sure only functions in this package can create Nodes. unexported() @@ -116,14 +118,14 @@ func (f *FuncNode) Check(t *Tree) error { } } for i, arg := range f.Args { - var funcType FuncType + var funcType models.FuncType if f.F.VArgs && i >= f.F.VArgsPos { funcType = f.F.Args[f.F.VArgsPos] } else { funcType = f.F.Args[i] } argType := arg.Return() - if funcType == TypeNumberSet && argType == TypeScalar { + if funcType == models.TypeNumberSet && argType == models.TypeScalar { // Scalars are promoted to NumberSets during execution. } else if funcType != argType { return fmt.Errorf("parse: expected %v, got %v for argument %v (%v)", funcType, argType, i, arg.String()) @@ -138,7 +140,7 @@ func (f *FuncNode) Check(t *Tree) error { return nil } -func (f *FuncNode) Return() FuncType { +func (f *FuncNode) Return() models.FuncType { return f.F.Return } @@ -204,8 +206,8 @@ func (n *NumberNode) Check(*Tree) error { return nil } -func (n *NumberNode) Return() FuncType { - return TypeScalar +func (n *NumberNode) Return() models.FuncType { + return models.TypeScalar } func (n *NumberNode) Tags() (Tags, error) { @@ -236,8 +238,8 @@ func (s *StringNode) Check(*Tree) error { return nil } -func (s *StringNode) Return() FuncType { - return TypeString +func (s *StringNode) Return() models.FuncType { + return models.TypeString } func (s *StringNode) Tags() (Tags, error) { @@ -268,14 +270,14 @@ func (b *BinaryNode) StringAST() string { func (b *BinaryNode) Check(t *Tree) error { t1 := b.Args[0].Return() t2 := b.Args[1].Return() - if t1 == TypeSeriesSet && t2 == TypeSeriesSet { + if t1 == models.TypeSeriesSet && t2 == models.TypeSeriesSet { return fmt.Errorf("parse: type error in %s: at least one side must be a number", b) } check := t1 - if t1 == TypeSeriesSet { + if t1 == models.TypeSeriesSet { check = t2 } - if check != TypeNumberSet && check != TypeScalar { + if check != models.TypeNumberSet && check != models.TypeScalar { return fmt.Errorf("parse: type error in %s: expected a number", b) } if err := b.Args[0].Check(t); err != nil { @@ -298,7 +300,7 @@ func (b *BinaryNode) Check(t *Tree) error { return nil } -func (b *BinaryNode) Return() FuncType { +func (b *BinaryNode) Return() models.FuncType { t0 := b.Args[0].Return() t1 := b.Args[1].Return() if t1 > t0 { @@ -341,14 +343,14 @@ func (u *UnaryNode) StringAST() string { func (u *UnaryNode) Check(t *Tree) error { switch rt := u.Arg.Return(); rt { - case TypeNumberSet, TypeSeriesSet, TypeScalar: + case models.TypeNumberSet, models.TypeSeriesSet, models.TypeScalar: return u.Arg.Check(t) default: return fmt.Errorf("parse: type error in %s, expected %s, got %s", u, "number", rt) } } -func (u *UnaryNode) Return() FuncType { +func (u *UnaryNode) Return() models.FuncType { return u.Arg.Return() } diff --git a/cmd/bosun/expr/parse/parse.go b/cmd/bosun/expr/parse/parse.go index 9486f631c5..0ff71e5d90 100644 --- a/cmd/bosun/expr/parse/parse.go +++ b/cmd/bosun/expr/parse/parse.go @@ -13,6 +13,8 @@ import ( "sort" "strconv" "strings" + + "bosun.org/models" ) // Tree is the representation of a single parsed expression. @@ -29,8 +31,8 @@ type Tree struct { } type Func struct { - Args []FuncType - Return FuncType + Args []models.FuncType + Return models.FuncType Tags func([]Node) (Tags, error) F interface{} VArgs bool @@ -38,36 +40,6 @@ type Func struct { Check func(*Tree, *FuncNode) error } -type FuncType int - -func (f FuncType) String() string { - switch f { - case TypeNumberSet: - return "number" - case TypeString: - return "string" - case TypeSeriesSet: - return "series" - case TypeScalar: - return "scalar" - case TypeESQuery: - return "esquery" - case TypeESIndexer: - return "esindexer" - default: - return "unknown" - } -} - -const ( - TypeString FuncType = iota - TypeScalar - TypeNumberSet - TypeSeriesSet - TypeESQuery - TypeESIndexer -) - type Tags map[string]struct{} func (t Tags) String() string { @@ -214,7 +186,7 @@ func (t *Tree) startParse(funcs []map[string]Func, lex *lexer) { for _, funcMap := range funcs { for name, f := range funcMap { switch f.Return { - case TypeSeriesSet, TypeNumberSet: + case models.TypeSeriesSet, models.TypeNumberSet: if f.Tags == nil { panic(fmt.Errorf("%v: expected Tags definition: got nil", name)) } diff --git a/cmd/bosun/expr/parse/parse_test.go b/cmd/bosun/expr/parse/parse_test.go index 007a33fbed..ca8613d60b 100644 --- a/cmd/bosun/expr/parse/parse_test.go +++ b/cmd/bosun/expr/parse/parse_test.go @@ -8,6 +8,8 @@ import ( "flag" "fmt" "testing" + + "bosun.org/models" ) var debug = flag.Bool("debug", false, "show the errors produced by the main tests") @@ -150,8 +152,8 @@ func tagNil(args []Node) (Tags, error) { var builtins = map[string]Func{ "avg": { - []FuncType{TypeSeriesSet}, - TypeNumberSet, + []models.FuncType{models.TypeSeriesSet}, + models.TypeNumberSet, tagNil, nil, false, @@ -159,8 +161,8 @@ var builtins = map[string]Func{ nil, }, "band": { - []FuncType{TypeString, TypeString, TypeString, TypeScalar}, - TypeSeriesSet, + []models.FuncType{models.TypeString, models.TypeString, models.TypeString, models.TypeScalar}, + models.TypeSeriesSet, tagNil, nil, false, @@ -168,8 +170,8 @@ var builtins = map[string]Func{ nil, }, "q": { - []FuncType{TypeString, TypeString}, - TypeSeriesSet, + []models.FuncType{models.TypeString, models.TypeString}, + models.TypeSeriesSet, tagNil, nil, false, @@ -177,8 +179,8 @@ var builtins = map[string]Func{ nil, }, "forecastlr": { - []FuncType{TypeSeriesSet, TypeScalar}, - TypeNumberSet, + []models.FuncType{models.TypeSeriesSet, models.TypeScalar}, + models.TypeNumberSet, tagNil, nil, false, diff --git a/cmd/bosun/sched/bolt.go b/cmd/bosun/sched/bolt.go index 860e7e516c..c406f6ea73 100644 --- a/cmd/bosun/sched/bolt.go +++ b/cmd/bosun/sched/bolt.go @@ -15,6 +15,7 @@ import ( "bosun.org/_third_party/github.com/boltdb/bolt" "bosun.org/cmd/bosun/conf" "bosun.org/cmd/bosun/database" + "bosun.org/cmd/bosun/expr" "bosun.org/collect" "bosun.org/metadata" "bosun.org/models" @@ -44,7 +45,6 @@ const ( dbBucket = "bindata" dbConfigTextBucket = "configText" dbNotifications = "notifications" - dbStatus = "status" ) func (s *Schedule) save() { @@ -54,7 +54,6 @@ func (s *Schedule) save() { s.Lock("Save") store := map[string]interface{}{ dbNotifications: s.Notifications, - dbStatus: s.status, } tostore := make(map[string][]byte) for name, data := range store { @@ -141,63 +140,66 @@ func (s *Schedule) RestoreState() error { slog.Errorln(dbNotifications, err) } - status := make(States) - if err := decode(db, dbStatus, &status); err != nil { - slog.Errorln(dbStatus, err) - } - clear := func(r *Result) { - if r == nil { - return - } - r.Computations = nil - } - for ak, st := range status { - a, present := s.Conf.Alerts[ak.Name()] - if !present { - slog.Errorln("sched: alert no longer present, ignoring:", ak) - continue - } else if s.Conf.Squelched(a, st.Group) { - slog.Infoln("sched: alert now squelched:", ak) - continue - } else { - t := a.Unknown - if t == 0 { - t = s.Conf.CheckFrequency - } - if t == 0 && st.Last().Status == StUnknown { - st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId}) - } - } - clear(st.Result) - newHistory := []Event{} - for _, e := range st.History { - clear(e.Warn) - clear(e.Crit) - // Remove error events which no longer are a thing. - if e.Status <= StUnknown { - newHistory = append(newHistory, e) - } - } - st.History = newHistory - s.status[ak] = st - if a.Log && st.Open { - st.Open = false - slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status()) - } - for name, t := range notifications[ak] { - n, present := s.Conf.Notifications[name] - if !present { - slog.Infoln("sched: notification not present during restore:", name) - continue - } - if a.Log { - slog.Infoln("sched: alert is now log, removing notification:", ak) - continue - } - s.AddNotification(ak, n, t) - } + //status := make(States) + // if err := decode(db, dbStatus, &status); err != nil { + // slog.Errorln(dbStatus, err) + // } + // clear := func(r *models.Result) { + // if r == nil { + // return + // } + // r.Computations = nil + //} + //TODO: ??? + // for ak, st := range status { + // a, present := s.Conf.Alerts[ak.Name()] + // if !present { + // slog.Errorln("sched: alert no longer present, ignoring:", ak) + // continue + // } else if s.Conf.Squelched(a, st.Group) { + // slog.Infoln("sched: alert now squelched:", ak) + // continue + // } else { + // t := a.Unknown + // if t == 0 { + // t = s.Conf.CheckFrequency + // } + // if t == 0 && st.Last().Status == StUnknown { + // st.Append(&Event{Status: StNormal, IncidentId: st.Last().IncidentId}) + // } + // } + // clear(st.Result) + // newHistory := []Event{} + // for _, e := range st.History { + // clear(e.Warn) + // clear(e.Crit) + // // Remove error events which no longer are a thing. + // if e.Status <= StUnknown { + // newHistory = append(newHistory, e) + // } + // } + // st.History = newHistory + // s.status[ak] = st + // if a.Log && st.Open { + // st.Open = false + // slog.Infof("sched: alert %s is now log, closing, was %s", ak, st.Status()) + // } + // for name, t := range notifications[ak] { + // n, present := s.Conf.Notifications[name] + // if !present { + // slog.Infoln("sched: notification not present during restore:", name) + // continue + // } + // if a.Log { + // slog.Infoln("sched: alert is now log, removing notification:", ak) + // continue + // } + // s.AddNotification(ak, n, t) + // } + //} + if err := migrateOldDataToRedis(db, s.DataAccess); err != nil { + return err } - migrateOldDataToRedis(db, s.DataAccess) // delete metrictags if they exist. deleteKey(s.db, "metrictags") slog.Infoln("RestoreState done in", time.Since(start)) @@ -272,10 +274,10 @@ func migrateOldDataToRedis(db *bolt.DB, data database.DataAccess) error { if err := migrateSearch(db, data); err != nil { return err } - if err := migrateIncidents(db, data); err != nil { + if err := migrateSilence(db, data); err != nil { return err } - if err := migrateSilence(db, data); err != nil { + if err := migrateState(db, data); err != nil { return err } return nil @@ -408,60 +410,175 @@ func migrateSearch(db *bolt.DB, data database.DataAccess) error { return nil } -func migrateIncidents(db *bolt.DB, data database.DataAccess) error { - migrated, err := isMigrated(db, "incidents") +func migrateSilence(db *bolt.DB, data database.DataAccess) error { + migrated, err := isMigrated(db, "silence") if err != nil { return err } if migrated { return nil } - slog.Info("migrating incidents") - incidents := map[uint64]*models.Incident{} - if err := decode(db, "incidents", &incidents); err != nil { + slog.Info("migrating silence") + silence := map[string]*models.Silence{} + if err := decode(db, "silence", &silence); err != nil { return err } - max := uint64(0) - for k, v := range incidents { - data.Incidents().UpdateIncident(k, v) - if k > max { - max = k - } - } - - if err = data.Incidents().SetMaxId(max); err != nil { - return err + for _, v := range silence { + v.TagString = v.Tags.Tags() + data.Silence().AddSilence(v) } - if err = setMigrated(db, "incidents"); err != nil { + if err = setMigrated(db, "silence"); err != nil { return err } - return nil } -func migrateSilence(db *bolt.DB, data database.DataAccess) error { - migrated, err := isMigrated(db, "silence") +func migrateState(db *bolt.DB, data database.DataAccess) error { + migrated, err := isMigrated(db, "state") if err != nil { return err } if migrated { return nil } - slog.Info("migrating silence") - silence := map[string]*models.Silence{} - if err := decode(db, "silence", &silence); err != nil { + //redefine the structs as they were when we gob encoded them + type Result struct { + *expr.Result + Expr string + } + mResult := func(r *Result) *models.Result { + if r == nil || r.Result == nil { + return &models.Result{} + } + v, _ := valueToFloat(r.Result.Value) + return &models.Result{ + Computations: r.Result.Computations, + Value: models.Float(v), + Expr: r.Expr, + } + } + type Event struct { + Warn, Crit *Result + Status models.Status + Time time.Time + Unevaluated bool + IncidentId uint64 + } + type State struct { + *Result + History []Event + Actions []models.Action + Touched time.Time + Alert string + Tags string + Group opentsdb.TagSet + Subject string + Body string + EmailBody []byte + EmailSubject []byte + Attachments []*models.Attachment + NeedAck bool + Open bool + Forgotten bool + Unevaluated bool + LastLogTime time.Time + } + type OldStates map[models.AlertKey]*State + slog.Info("migrating state") + states := OldStates{} + if err := decode(db, "status", &states); err != nil { return err } - for _, v := range silence { - v.TagString = v.Tags.Tags() - data.Silence().AddSilence(v) + for ak, state := range states { + if len(state.History) == 0 { + continue + } + var thisId uint64 + events := []Event{} + addIncident := func(saveBody bool) error { + if thisId == 0 || len(events) == 0 || state == nil { + return nil + } + incident := NewIncident(ak) + incident.Expr = state.Expr + + incident.NeedAck = state.NeedAck + incident.Open = state.Open + incident.Result = mResult(state.Result) + incident.Unevaluated = state.Unevaluated + incident.Start = events[0].Time + incident.Id = int64(thisId) + incident.Subject = state.Subject + if saveBody { + incident.Body = state.Body + } + for _, ev := range events { + incident.CurrentStatus = ev.Status + mEvent := models.Event{ + Crit: mResult(ev.Crit), + Status: ev.Status, + Time: ev.Time, + Unevaluated: ev.Unevaluated, + Warn: mResult(ev.Warn), + } + incident.Events = append(incident.Events, mEvent) + if ev.Status > incident.WorstStatus { + incident.WorstStatus = ev.Status + } + if ev.Status > models.StNormal { + incident.LastAbnormalStatus = ev.Status + incident.LastAbnormalTime = ev.Time.UTC().Unix() + } + } + for _, ac := range state.Actions { + if ac.Time.Before(incident.Start) { + continue + } + incident.Actions = append(incident.Actions, ac) + if ac.Time.After(incident.Events[len(incident.Events)-1].Time) && ac.Type == models.ActionClose { + incident.End = &ac.Time + break + } + } + if err := data.State().ImportIncidentState(incident); err != nil { + return err + } + return nil + } + //essentially a rle algorithm to assign events to incidents + for _, e := range state.History { + if e.Status > models.StUnknown { + continue + } + if e.IncidentId == 0 { + //include all non-assigned incidents up to the next non-match + events = append(events, e) + continue + } + if thisId == 0 { + thisId = e.IncidentId + events = append(events, e) + } + if e.IncidentId != thisId { + if err := addIncident(false); err != nil { + return err + } + thisId = e.IncidentId + events = []Event{e} + + } else { + events = append(events, e) + } + } + if err := addIncident(true); err != nil { + return err + } } - if err = setMigrated(db, "silence"); err != nil { + if err = setMigrated(db, "state"); err != nil { return err } return nil } - func isMigrated(db *bolt.DB, name string) (bool, error) { found := false err := db.View(func(tx *bolt.Tx) error { diff --git a/cmd/bosun/sched/check.go b/cmd/bosun/sched/check.go index 6124f4f536..58f4767434 100644 --- a/cmd/bosun/sched/check.go +++ b/cmd/bosun/sched/check.go @@ -38,41 +38,14 @@ func init() { collect.AggregateMeta("bosun.template.render", metadata.MilliSecond, "The amount of time it takes to render the specified alert template.") } -func NewStatus(ak models.AlertKey) *State { - g := ak.Group() - return &State{ - Alert: ak.Name(), - Tags: g.Tags(), - Group: g, - } -} - -// Get a copy of the status for the specified alert key -func (s *Schedule) GetStatus(ak models.AlertKey) *State { - s.Lock("GetStatus") - state := s.status[ak] - if state != nil { - state = state.Copy() - } - s.Unlock() - return state -} - -func (s *Schedule) SetStatus(ak models.AlertKey, st *State) { - s.Lock("SetStatus") - s.status[ak] = st - s.Unlock() -} - -func (s *Schedule) GetOrCreateStatus(ak models.AlertKey) *State { - s.Lock("GetOrCreateStatus") - state := s.status[ak] - if state == nil { - state = NewStatus(ak) - s.status[ak] = state - } - s.Unlock() - return state +func NewIncident(ak models.AlertKey) *models.IncidentState { + s := &models.IncidentState{} + s.Start = time.Now() + s.AlertKey = ak + s.Alert = ak.Name() + s.Tags = ak.Group().Tags() + s.Result = &models.Result{} + return s } type RunHistory struct { @@ -84,7 +57,7 @@ type RunHistory struct { Logstash expr.LogstashElasticHosts Elastic expr.ElasticHosts - Events map[models.AlertKey]*Event + Events map[models.AlertKey]*models.Event schedule *Schedule } @@ -100,7 +73,7 @@ func (s *Schedule) NewRunHistory(start time.Time, cache *cache.Cache) *RunHistor return &RunHistory{ Cache: cache, Start: start, - Events: make(map[models.AlertKey]*Event), + Events: make(map[models.AlertKey]*models.Event), Context: s.Conf.TSDBContext(), GraphiteContext: s.Conf.GraphiteContext(), InfluxConfig: s.Conf.InfluxConfig, @@ -115,7 +88,11 @@ func (s *Schedule) RunHistory(r *RunHistory) { checkNotify := false silenced := s.Silenced() for ak, event := range r.Events { - checkNotify = s.runHistory(r, ak, event, silenced) || checkNotify + shouldNotify, err := s.runHistory(r, ak, event, silenced) + checkNotify = checkNotify || shouldNotify + if err != nil { + slog.Errorf("Error in runHistory for %s. %s.", ak, err) + } } if checkNotify && s.nc != nil { select { @@ -126,146 +103,148 @@ func (s *Schedule) RunHistory(r *RunHistory) { } // RunHistory for a single alert key. Returns true if notifications were altered. -func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *Event, silenced map[models.AlertKey]models.Silence) bool { - checkNotify := false - // get existing state object for alert key. add to schedule status if doesn't already exist - state := s.GetStatus(ak) - if state == nil { - state = NewStatus(ak) - s.SetStatus(ak, state) - } - defer s.SetStatus(ak, state) - // make sure we always touch the state. - state.Touched = r.Start - // set state.Result according to event result - if event.Crit != nil { - state.Result = event.Crit - } else if event.Warn != nil { - state.Result = event.Warn - } - // if event is unevaluated, we are done. - state.Unevaluated = event.Unevaluated - if event.Unevaluated { - return checkNotify - } - // assign incident id to new event if applicable - prev := state.Last() - worst := StNormal +func (s *Schedule) runHistory(r *RunHistory, ak models.AlertKey, event *models.Event, silenced SilenceTester) (checkNotify bool, err error) { event.Time = r.Start - if prev.IncidentId != 0 { - // If last event has incident id and is not closed, we continue it. - incident, err := s.DataAccess.Incidents().GetIncident(prev.IncidentId) - if err != nil { - slog.Error(err) - } else if incident.End == nil { - event.IncidentId = prev.IncidentId - worst = state.WorstThisIncident() - } + data := s.DataAccess.State() + err = data.TouchAlertKey(ak, time.Now()) + if err != nil { + return } - if event.IncidentId == 0 && event.Status != StNormal { - incident, err := s.createIncident(ak, event.Time) - if err != nil { - slog.Error("Error creating incident", err) + // get existing open incident if exists + incident, err := data.GetOpenIncident(ak) + if err != nil { + return + } + defer func() { + // save unless incident is new and closed (log alert) + if incident != nil && (incident.Id != 0 || incident.Open) { + err = data.UpdateIncidentState(incident) } else { - event.IncidentId = incident.Id + err = data.SetUnevaluated(ak, event.Unevaluated) // if nothing to save, at least store the unevaluated state } + }() + // If nothing is out of the ordinary we are done + if event.Status <= models.StNormal && incident == nil { + return + } + + // if event is unevaluated, we are done also. + if incident != nil { + incident.Unevaluated = event.Unevaluated + } + if event.Unevaluated { + return + } + + shouldNotify := false + if incident == nil { + incident = NewIncident(ak) + shouldNotify = true + } + // set state.Result according to event result + if event.Status == models.StCritical { + incident.Result = event.Crit + } else if event.Status == models.StWarning { + incident.Result = event.Warn + } + + if event.Status > models.StNormal { + incident.LastAbnormalStatus = event.Status + incident.LastAbnormalTime = event.Time.UTC().Unix() + } + if event.Status > incident.WorstStatus { + incident.WorstStatus = event.Status + shouldNotify = true } + if event.Status != incident.CurrentStatus { + incident.Events = append(incident.Events, *event) + } + incident.CurrentStatus = event.Status - state.Append(event) a := s.Conf.Alerts[ak.Name()] - // render templates and open alert key if abnormal - if event.Status > StNormal { - s.executeTemplates(state, event, a, r) - state.Open = true + //render templates and open alert key if abnormal + if event.Status > models.StNormal { + s.executeTemplates(incident, event, a, r) + incident.Open = true if a.Log { - worst = StNormal - state.Open = false + incident.Open = false } } + // On state increase, clear old notifications and notify current. - // If the old alert was not acknowledged, do nothing. // Do nothing if state did not change. notify := func(ns *conf.Notifications) { if a.Log { - lastLogTime := state.LastLogTime + lastLogTime := s.lastLogTimes[ak] now := time.Now() if now.Before(lastLogTime.Add(a.MaxLogFrequency)) { return } - state.LastLogTime = now + s.lastLogTimes[ak] = now } - nots := ns.Get(s.Conf, state.Group) + nots := ns.Get(s.Conf, incident.AlertKey.Group()) for _, n := range nots { - s.Notify(state, n) + s.Notify(incident, n) checkNotify = true } } + notifyCurrent := func() { - // Auto close ignoreUnknowns. - if a.IgnoreUnknown && event.Status == StUnknown { - state.Open = false - state.Forgotten = true - state.NeedAck = false - state.Action("bosun", "Auto close because alert has ignoreUnknown.", ActionClose, event.Time) - slog.Infof("auto close %s because alert has ignoreUnknown", ak) + si := silenced(ak) + //Auto close ignoreUnknowns for new incident. + if a.IgnoreUnknown && event.Status == models.StUnknown { + incident.Open = false return - } else if silenced[ak].Forget && event.Status == StUnknown { - state.Open = false - state.Forgotten = true - state.NeedAck = false - state.Action("bosun", "Auto close because alert is silenced and marked auto forget.", ActionClose, event.Time) - slog.Infof("auto close %s because alert is silenced and marked auto forget", ak) + } else if si != nil && si.Forget && event.Status == models.StUnknown { + incident.Open = false return } - state.NeedAck = true + incident.NeedAck = true switch event.Status { - case StCritical, StUnknown: + case models.StCritical, models.StUnknown: notify(a.CritNotification) - case StWarning: + case models.StWarning: notify(a.WarnNotification) } } clearOld := func() { - state.NeedAck = false + incident.NeedAck = false delete(s.Notifications, ak) } // lock while we change notifications. s.Lock("RunHistory") - if event.Status > worst { + if shouldNotify { clearOld() notifyCurrent() - } else if _, ok := silenced[ak]; ok && event.Status == StNormal { + } + + // finally close an open alert with silence once it goes back to normal. + if si := silenced(ak); si != nil && event.Status == models.StNormal { go func(ak models.AlertKey) { slog.Infof("auto close %s because was silenced", ak) - err := s.Action("bosun", "Auto close because was silenced.", ActionClose, ak) + err := s.Action("bosun", "Auto close because was silenced.", models.ActionClose, ak) if err != nil { slog.Errorln(err) } }(ak) } - s.Unlock() - return checkNotify + return checkNotify, nil } -func (s *Schedule) executeTemplates(state *State, event *Event, a *conf.Alert, r *RunHistory) { - state.Subject = "" - state.Body = "" - state.EmailBody = nil - state.EmailSubject = nil - state.Attachments = nil - if event.Status != StUnknown { +func (s *Schedule) executeTemplates(state *models.IncidentState, event *models.Event, a *conf.Alert, r *RunHistory) { + if event.Status != models.StUnknown { var errs []error metric := "template.render" //Render subject endTiming := collect.StartTimer(metric, opentsdb.TagSet{"alert": a.Name, "type": "subject"}) subject, err := s.ExecuteSubject(r, a, state, false) if err != nil { - slog.Infof("%s: %v", state.AlertKey(), err) + slog.Infof("%s: %v", state.AlertKey, err) errs = append(errs, err) } else if subject == nil { - err = fmt.Errorf("Empty subject on %s", state.AlertKey()) + err = fmt.Errorf("Empty subject on %s", state.AlertKey) slog.Error(err) errs = append(errs, err) } @@ -275,10 +254,10 @@ func (s *Schedule) executeTemplates(state *State, event *Event, a *conf.Alert, r endTiming = collect.StartTimer(metric, opentsdb.TagSet{"alert": a.Name, "type": "body"}) body, _, err := s.ExecuteBody(r, a, state, false) if err != nil { - slog.Infof("%s: %v", state.AlertKey(), err) + slog.Infof("%s: %v", state.AlertKey, err) errs = append(errs, err) } else if subject == nil { - err = fmt.Errorf("Empty body on %s", state.AlertKey()) + err = fmt.Errorf("Empty body on %s", state.AlertKey) slog.Error(err) errs = append(errs, err) } @@ -288,10 +267,10 @@ func (s *Schedule) executeTemplates(state *State, event *Event, a *conf.Alert, r endTiming = collect.StartTimer(metric, opentsdb.TagSet{"alert": a.Name, "type": "emailbody"}) emailbody, attachments, err := s.ExecuteBody(r, a, state, true) if err != nil { - slog.Infof("%s: %v", state.AlertKey(), err) + slog.Infof("%s: %v", state.AlertKey, err) errs = append(errs, err) } else if subject == nil { - err = fmt.Errorf("Empty email body on %s", state.AlertKey()) + err = fmt.Errorf("Empty email body on %s", state.AlertKey) slog.Error(err) errs = append(errs, err) } @@ -301,10 +280,10 @@ func (s *Schedule) executeTemplates(state *State, event *Event, a *conf.Alert, r endTiming = collect.StartTimer(metric, opentsdb.TagSet{"alert": a.Name, "type": "emailsubject"}) emailsubject, err := s.ExecuteSubject(r, a, state, true) if err != nil { - slog.Infof("%s: %v", state.AlertKey(), err) + slog.Infof("%s: %v", state.AlertKey, err) errs = append(errs, err) } else if subject == nil { - err = fmt.Errorf("Empty email subject on %s", state.AlertKey()) + err = fmt.Errorf("Empty email subject on %s", state.AlertKey) slog.Error(err) errs = append(errs, err) } @@ -342,7 +321,7 @@ func (s *Schedule) CollectStates() { for _, alert := range s.Conf.Alerts { severityCounts[alert.Name] = make(map[string]int64) abnormalCounts[alert.Name] = make(map[string]int64) - var i Status + var i models.Status for i = 1; i.String() != "none"; i++ { severityCounts[alert.Name][i.String()] = 0 abnormalCounts[alert.Name][i.String()] = 0 @@ -360,36 +339,37 @@ func (s *Schedule) CollectStates() { ackByNotificationCounts[notificationName][false] = 0 ackByNotificationCounts[notificationName][true] = 0 } - for _, state := range s.status { - if !state.Open { - continue - } - name := state.AlertKey().Name() - alertDef := s.Conf.Alerts[name] - nots := make(map[string]bool) - for name := range alertDef.WarnNotification.Get(s.Conf, state.Group) { - nots[name] = true - } - for name := range alertDef.CritNotification.Get(s.Conf, state.Group) { - nots[name] = true - } - incident, err := s.GetIncident(state.Last().IncidentId) - if err != nil { - slog.Errorln(err) - } - for notificationName := range nots { - ackByNotificationCounts[notificationName][state.NeedAck]++ - if incident != nil && incident.Start.Before(unAckOldestByNotification[notificationName]) && state.NeedAck { - unAckOldestByNotification[notificationName] = incident.Start - } - } - severity := state.Status().String() - lastAbnormal := state.AbnormalStatus().String() - severityCounts[state.Alert][severity]++ - abnormalCounts[state.Alert][lastAbnormal]++ - ackStatusCounts[state.Alert][state.NeedAck]++ - activeStatusCounts[state.Alert][state.IsActive()]++ - } + //TODO: + // for _, state := range s.status { + // if !state.Open { + // continue + // } + // name := state.AlertKey.Name() + // alertDef := s.Conf.Alerts[name] + // nots := make(map[string]bool) + // for name := range alertDef.WarnNotification.Get(s.Conf, state.Group) { + // nots[name] = true + // } + // for name := range alertDef.CritNotification.Get(s.Conf, state.Group) { + // nots[name] = true + // } + // incident, err := s.GetIncident(state.Last().IncidentId) + // if err != nil { + // slog.Errorln(err) + // } + // for notificationName := range nots { + // ackByNotificationCounts[notificationName][state.NeedAck]++ + // if incident != nil && incident.Start.Before(unAckOldestByNotification[notificationName]) && state.NeedAck { + // unAckOldestByNotification[notificationName] = incident.Start + // } + // } + // severity := state.CurrentStatus.String() + // lastAbnormal := state.LastAbnormalStatus.String() + // severityCounts[state.Alert][severity]++ + // abnormalCounts[state.Alert][lastAbnormal]++ + // ackStatusCounts[state.Alert][state.NeedAck]++ + // activeStatusCounts[state.Alert][state.IsActive()]++ + // } for notification := range ackByNotificationCounts { ts := opentsdb.TagSet{"notification": notification} err := collect.Put("alerts.acknowledgement_status_by_notification", @@ -462,21 +442,12 @@ func (s *Schedule) CollectStates() { } } -func (r *RunHistory) GetUnknownAndUnevaluatedAlertKeys(alert string) (unknown, uneval []models.AlertKey) { - unknown = []models.AlertKey{} - uneval = []models.AlertKey{} - r.schedule.Lock("GetUnknownUneval") - for ak, st := range r.schedule.status { - if ak.Name() != alert { - continue - } - if st.Last().Status == StUnknown { - unknown = append(unknown, ak) - } else if st.Unevaluated { - uneval = append(uneval, ak) - } +func (s *Schedule) GetUnknownAndUnevaluatedAlertKeys(alert string) (unknown, uneval []models.AlertKey) { + unknown, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(alert) + if err != nil { + slog.Errorf("Error getting unknown/unevaluated alert keys: %s", err) + return nil, nil } - r.schedule.Unlock() return unknown, uneval } @@ -487,23 +458,23 @@ func (s *Schedule) findUnknownAlerts(now time.Time, alert string) []models.Alert if time.Now().Sub(bosunStartupTime) < s.Conf.CheckFrequency { return keys } - s.Lock("FindUnknown") - for ak, st := range s.status { - name := ak.Name() - if name != alert || st.Forgotten || !s.AlertSuccessful(ak.Name()) { - continue - } - a := s.Conf.Alerts[name] - t := a.Unknown - if t == 0 { - t = s.Conf.CheckFrequency * 2 * time.Duration(a.RunEvery) - } - if now.Sub(st.Touched) < t { - continue - } + if !s.AlertSuccessful(alert) { + return keys + } + a := s.Conf.Alerts[alert] + t := a.Unknown + if t == 0 { + t = s.Conf.CheckFrequency * 2 * time.Duration(a.RunEvery) + } + maxTouched := now.UTC().Unix() - int64(t.Seconds()) + untouched, err := s.DataAccess.State().GetUntouchedSince(alert, maxTouched) + if err != nil { + slog.Errorf("Error finding unknown alerts for alert %s: %s.", alert, err) + return keys + } + for _, ak := range untouched { keys = append(keys, ak) } - s.Unlock() return keys } @@ -511,16 +482,16 @@ func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert slog.Infof("check alert %v start", a.Name) start := time.Now() for _, ak := range s.findUnknownAlerts(r.Start, a.Name) { - r.Events[ak] = &Event{Status: StUnknown} + r.Events[ak] = &models.Event{Status: models.StUnknown} } var warns, crits models.AlertKeys d, err := s.executeExpr(T, r, a, a.Depends) var deps expr.ResultSlice if err == nil { deps = filterDependencyResults(d) - crits, err = s.CheckExpr(T, r, a, a.Crit, StCritical, nil) + crits, err = s.CheckExpr(T, r, a, a.Crit, models.StCritical, nil) if err == nil { - warns, err = s.CheckExpr(T, r, a, a.Warn, StWarning, crits) + warns, err = s.CheckExpr(T, r, a, a.Warn, models.StWarning, crits) } } unevalCount, unknownCount := markDependenciesUnevaluated(r.Events, deps, a.Name) @@ -535,9 +506,9 @@ func (s *Schedule) CheckAlert(T miniprofiler.Timer, r *RunHistory, a *conf.Alert slog.Infof("check alert %v done (%s): %v crits, %v warns, %v unevaluated, %v unknown", a.Name, time.Since(start), len(crits), len(warns), unevalCount, unknownCount) } -func removeUnknownEvents(evs map[models.AlertKey]*Event, alert string) { +func removeUnknownEvents(evs map[models.AlertKey]*models.Event, alert string) { for k, v := range evs { - if v.Status == StUnknown && k.Name() == alert { + if v.Status == models.StUnknown && k.Name() == alert { delete(evs, k) } } @@ -565,7 +536,7 @@ func filterDependencyResults(results *expr.Results) expr.ResultSlice { return filtered } -func markDependenciesUnevaluated(events map[models.AlertKey]*Event, deps expr.ResultSlice, alert string) (unevalCount, unknownCount int) { +func markDependenciesUnevaluated(events map[models.AlertKey]*models.Event, deps expr.ResultSlice, alert string) (unevalCount, unknownCount int) { for ak, ev := range events { if ak.Name() != alert { continue @@ -575,7 +546,7 @@ func markDependenciesUnevaluated(events map[models.AlertKey]*Event, deps expr.Re ev.Unevaluated = true unevalCount++ } - if ev.Status == StUnknown { + if ev.Status == models.StUnknown { unknownCount++ } } @@ -587,11 +558,11 @@ func (s *Schedule) executeExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Ale if e == nil { return nil, nil } - results, _, err := e.Execute(rh.Context, rh.GraphiteContext, rh.Logstash, rh.Elastic, rh.InfluxConfig, rh.Cache, T, rh.Start, 0, a.UnjoinedOK, s.Search, s.Conf.AlertSquelched(a), rh) + results, _, err := e.Execute(rh.Context, rh.GraphiteContext, rh.Logstash, rh.Elastic, rh.InfluxConfig, rh.Cache, T, rh.Start, 0, a.UnjoinedOK, s.Search, s.Conf.AlertSquelched(a), s) return results, err } -func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error) { +func (s *Schedule) CheckExpr(T miniprofiler.Timer, rh *RunHistory, a *conf.Alert, e *expr.Expr, checkStatus models.Status, ignore models.AlertKeys) (alerts models.AlertKeys, err error) { if e == nil { return } @@ -618,37 +589,33 @@ Loop: } } var n float64 - switch v := r.Value.(type) { - case expr.Number: - n = float64(v) - case expr.Scalar: - n = float64(v) - default: - err = fmt.Errorf("expected number or scalar") + n, err = valueToFloat(r.Value) + if err != nil { return } event := rh.Events[ak] if event == nil { - event = new(Event) + event = new(models.Event) rh.Events[ak] = event } - result := &Result{ - Result: r, - Expr: e.String(), + result := &models.Result{ + Computations: r.Computations, + Value: models.Float(n), + Expr: e.String(), } switch checkStatus { - case StWarning: + case models.StWarning: event.Warn = result - case StCritical: + case models.StCritical: event.Crit = result } status := checkStatus if math.IsNaN(n) { status = checkStatus } else if n == 0 { - status = StNormal + status = models.StNormal } - if status != StNormal { + if status != models.StNormal { alerts = append(alerts, ak) } if status > rh.Events[ak].Status { @@ -657,3 +624,16 @@ Loop: } return } + +func valueToFloat(val expr.Value) (float64, error) { + var n float64 + switch v := val.(type) { + case expr.Number: + n = float64(v) + case expr.Scalar: + n = float64(v) + default: + return 0, fmt.Errorf("expected number or scalar") + } + return n, nil +} diff --git a/cmd/bosun/sched/check_test.go b/cmd/bosun/sched/check_test.go index 271218a61a..f9c68c57e9 100644 --- a/cmd/bosun/sched/check_test.go +++ b/cmd/bosun/sched/check_test.go @@ -15,7 +15,7 @@ import ( ) func TestCheckFlapping(t *testing.T) { - + defer setup()() c, err := conf.New("", ` template t { subject = 1 @@ -37,8 +37,8 @@ func TestCheckFlapping(t *testing.T) { s, _ := initSched(c) ak := models.NewAlertKey("a", nil) r := &RunHistory{ - Events: map[models.AlertKey]*Event{ - ak: {Status: StWarning}, + Events: map[models.AlertKey]*models.Event{ + ak: {Status: models.StWarning}, }, } hasNots := func() bool { @@ -58,17 +58,17 @@ func TestCheckFlapping(t *testing.T) { } type stateTransition struct { - S Status + S models.Status ExpectNots bool } transitions := []stateTransition{ - {StWarning, true}, - {StNormal, false}, - {StWarning, false}, - {StNormal, false}, - {StCritical, true}, - {StWarning, false}, - {StCritical, false}, + {models.StWarning, true}, + {models.StNormal, false}, + {models.StWarning, false}, + {models.StNormal, false}, + {models.StCritical, true}, + {models.StWarning, false}, + {models.StCritical, false}, } for i, trans := range transitions { @@ -81,13 +81,13 @@ func TestCheckFlapping(t *testing.T) { t.Fatalf("expected notifications for transition %d.", i) } } - r.Events[ak].Status = StNormal + r.Events[ak].Status = models.StNormal s.RunHistory(r) // Close the alert, so it should notify next time. - if err := s.Action("", "", ActionClose, ak); err != nil { + if err := s.Action("", "", models.ActionClose, ak); err != nil { t.Fatal(err) } - r.Events[ak].Status = StWarning + r.Events[ak].Status = models.StWarning s.RunHistory(r) if !hasNots() { t.Fatal("expected notification") @@ -95,7 +95,7 @@ func TestCheckFlapping(t *testing.T) { } func TestCheckSilence(t *testing.T) { - + defer setup()() done := make(chan bool, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { done <- true @@ -141,6 +141,7 @@ func TestCheckSilence(t *testing.T) { } func TestIncidentIds(t *testing.T) { + defer setup()() c, err := conf.New("", ` alert a { crit = 1 @@ -152,42 +153,43 @@ func TestIncidentIds(t *testing.T) { s, _ := initSched(c) ak := models.NewAlertKey("a", nil) r := &RunHistory{ - Events: map[models.AlertKey]*Event{ - ak: {Status: StWarning}, + Events: map[models.AlertKey]*models.Event{ + ak: {Status: models.StWarning}, }, } - expect := func(id uint64) { - if s.status[ak].Last().IncidentId != id { - t.Fatalf("Expeted incident id %d. Got %d.", id, s.status[ak].Last().IncidentId) + expect := func(id int64) { + incident, err := s.DataAccess.State().GetLatestIncident(ak) + if err != nil { + t.Fatal(err) + } + if incident.Id != id { + t.Fatalf("Expeted incident id %d. Got %d.", id, incident.Id) } } s.RunHistory(r) expect(1) - r.Events[ak].Status = StNormal - r.Events[ak].IncidentId = 0 + r.Events[ak].Status = models.StNormal s.RunHistory(r) expect(1) - r.Events[ak].Status = StWarning - r.Events[ak].IncidentId = 0 + r.Events[ak].Status = models.StWarning s.RunHistory(r) expect(1) - r.Events[ak].Status = StNormal - r.Events[ak].IncidentId = 0 + r.Events[ak].Status = models.StNormal s.RunHistory(r) - err = s.Action("", "", ActionClose, ak) + err = s.Action("", "", models.ActionClose, ak) if err != nil { t.Fatal(err) } - r.Events[ak].Status = StWarning - r.Events[ak].IncidentId = 0 + r.Events[ak].Status = models.StWarning s.RunHistory(r) expect(2) } func TestCheckNotify(t *testing.T) { + defer setup()() nc := make(chan string) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) @@ -231,6 +233,7 @@ func TestCheckNotify(t *testing.T) { } func TestCheckNotifyUnknown(t *testing.T) { + defer setup()() nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) @@ -264,9 +267,9 @@ func TestCheckNotifyUnknown(t *testing.T) { t.Fatal(err) } r := &RunHistory{ - Events: map[models.AlertKey]*Event{ - models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: StUnknown}, - models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: StUnknown}, + Events: map[models.AlertKey]*models.Event{ + models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown}, + models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown}, }, } s.RunHistory(r) @@ -294,6 +297,7 @@ Loop: // TestCheckNotifyUnknownDefault tests the default unknownTemplate. func TestCheckNotifyUnknownDefault(t *testing.T) { + defer setup()() nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) @@ -326,9 +330,9 @@ func TestCheckNotifyUnknownDefault(t *testing.T) { t.Fatal(err) } r := &RunHistory{ - Events: map[models.AlertKey]*Event{ - models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: StUnknown}, - models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: StUnknown}, + Events: map[models.AlertKey]*models.Event{ + models.NewAlertKey("a", opentsdb.TagSet{"h": "x"}): {Status: models.StUnknown}, + models.NewAlertKey("a", opentsdb.TagSet{"h": "y"}): {Status: models.StUnknown}, }, } s.RunHistory(r) @@ -355,6 +359,7 @@ Loop: } func TestCheckNotifyLog(t *testing.T) { + defer setup()() nc := make(chan string, 1) ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { b, _ := ioutil.ReadAll(r.Body) @@ -417,8 +422,12 @@ Loop: if !gotB { t.Errorf("didn't get expected b") } - for ak, st := range s.status { - switch ak { + status, err := s.DataAccess.State().GetAllOpenIncidents() + if err != nil { + t.Fatal(err) + } + for _, st := range status { + switch st.AlertKey { case "a{}": if !st.Open { t.Errorf("expected a to be open") @@ -428,56 +437,7 @@ Loop: t.Errorf("expected b to be closed") } default: - t.Errorf("unexpected alert key %s", ak) + t.Errorf("unexpected alert key %s", st.AlertKey) } } } - -// TestCheckCritUnknownEmpty checks that if an alert goes normal -> crit -> -// unknown, it's body and subject are empty. This is because we should not -// keep around the crit template renders if we are unknown. -func TestCheckCritUnknownEmpty(t *testing.T) { - c, err := conf.New("", ` - template t { - subject = 1 - body = 2 - } - alert a { - crit = 1 - template = t - } - `) - if err != nil { - t.Fatal(err) - } - s, _ := initSched(c) - ak := models.NewAlertKey("a", nil) - r := &RunHistory{ - Events: map[models.AlertKey]*Event{ - ak: {Status: StNormal}, - }, - } - verify := func(empty bool) { - st := s.GetStatus(ak) - if empty { - if st.Body != "" || st.Subject != "" { - t.Fatalf("expected empty body and subject") - } - } else { - if st.Body != "2" || st.Subject != "1" { - t.Fatalf("expected body and subject") - } - } - } - s.RunHistory(r) - verify(true) - r.Events[ak].Status = StCritical - s.RunHistory(r) - verify(false) - r.Events[ak].Status = StUnknown - s.RunHistory(r) - verify(true) - r.Events[ak].Status = StNormal - s.RunHistory(r) - verify(true) -} diff --git a/cmd/bosun/sched/depends_test.go b/cmd/bosun/sched/depends_test.go index 11e31a0262..01d5c79591 100644 --- a/cmd/bosun/sched/depends_test.go +++ b/cmd/bosun/sched/depends_test.go @@ -11,6 +11,7 @@ import ( // Crit returns {a=b},{a=c}, but {a=b} is ignored by dependency expression. // Result should be {a=c} only. func TestDependency_Simple(t *testing.T) { + defer setup()() testSched(t, &schedTest{ conf: `alert a { crit = avg(q("avg:c{a=*}", "5m", "")) > 0 @@ -50,6 +51,7 @@ func TestDependency_Simple(t *testing.T) { // Crit and depends don't have same tag sets. func TestDependency_Overlap(t *testing.T) { + defer setup()() testSched(t, &schedTest{ conf: `alert a { crit = avg(q("avg:c{a=*,b=*}", "5m", "")) > 0 @@ -88,6 +90,7 @@ func TestDependency_Overlap(t *testing.T) { } func TestDependency_OtherAlert(t *testing.T) { + defer setup()() testSched(t, &schedTest{ conf: `alert a { crit = avg(q("avg:a{host=*,cpu=*}", "5m", "")) > 0 @@ -129,9 +132,7 @@ func TestDependency_OtherAlert(t *testing.T) { } func TestDependency_OtherAlert_Unknown(t *testing.T) { - state := NewStatus("a{host=ny02}") - state.Touched = queryTime.Add(-10 * time.Minute) - state.Append(&Event{Status: StNormal, Time: state.Touched}) + defer setup()() testSched(t, &schedTest{ conf: `alert a { @@ -169,25 +170,18 @@ func TestDependency_OtherAlert_Unknown(t *testing.T) { schedState{"a{host=ny02}", "unknown"}: true, schedState{"os.cpu{host=ny01}", "warning"}: true, }, - previous: map[models.AlertKey]*State{ - "a{host=ny02}": state, + touched: map[models.AlertKey]time.Time{ + "a{host=ny02}": queryTime.Add(-10 * time.Minute), }, }) } func TestDependency_OtherAlert_UnknownChain(t *testing.T) { + defer setup()() ab := models.AlertKey("a{host=b}") bb := models.AlertKey("b{host=b}") cb := models.AlertKey("c{host=b}") - as := NewStatus(ab) - as.Touched = queryTime.Add(-time.Hour) - as.Append(&Event{Status: StNormal}) - bs := NewStatus(ab) - bs.Touched = queryTime - bs.Append(&Event{Status: StNormal}) - cs := NewStatus(ab) - cs.Touched = queryTime - cs.Append(&Event{Status: StNormal}) + s := testSched(t, &schedTest{ conf: ` alert a { @@ -215,28 +209,37 @@ func TestDependency_OtherAlert_UnknownChain(t *testing.T) { state: map[schedState]bool{ schedState{string(ab), "unknown"}: true, }, - previous: map[models.AlertKey]*State{ - ab: as, - bb: bs, - cb: cs, + touched: map[models.AlertKey]time.Time{ + ab: queryTime.Add(-time.Hour), + bb: queryTime, + cb: queryTime, }, }) - if s.status[ab].Unevaluated { - t.Errorf("should not be unevaluated: %s", ab) - } - if !s.status[bb].Unevaluated { - t.Errorf("should be unevaluated: %s", bb) - } - if !s.status[cb].Unevaluated { - t.Errorf("should be unevaluated: %s", cb) + check := func(ak models.AlertKey, expec bool) { + _, uneval, err := s.DataAccess.State().GetUnknownAndUnevalAlertKeys(ak.Name()) + if err != nil { + t.Fatal(err) + } + for _, ak2 := range uneval { + if ak2 == ak { + if !expec { + t.Fatalf("Should not be unevaluated: %s", ak) + } else { + return + } + } + } + if expec { + t.Fatalf("Should be unevaluated: %s", ak) + } } + check(ab, false) + check(bb, true) + check(cb, true) } func TestDependency_Blocks_Unknown(t *testing.T) { - state := NewStatus("a{host=ny01}") - state.Touched = queryTime.Add(-10 * time.Minute) - state.Append(&Event{Status: StNormal, Time: state.Touched}) - + defer setup()() testSched(t, &schedTest{ conf: `alert a { depends = avg(q("avg:b{host=*}", "5m", "")) > 0 @@ -255,24 +258,14 @@ func TestDependency_Blocks_Unknown(t *testing.T) { }, }, state: map[schedState]bool{}, - previous: map[models.AlertKey]*State{ - "a{host=ny01}": state, + touched: map[models.AlertKey]time.Time{ + "a{host=ny01}": queryTime.Add(-10 * time.Minute), }, }) } func TestDependency_AlertFunctionHasNoResults(t *testing.T) { - pingState := NewStatus("a{host=ny01,source=bosun01}") - pingState.Touched = queryTime.Add(-5 * time.Minute) - pingState.Append(&Event{Status: StNormal, Time: pingState.Touched}) - - scollState := NewStatus("b{host=ny01}") - scollState.Touched = queryTime.Add(-10 * time.Minute) - scollState.Append(&Event{Status: StNormal, Time: scollState.Touched}) - - cpuState := NewStatus("c{host=ny01}") - cpuState.Touched = queryTime.Add(-10 * time.Minute) - cpuState.Append(&Event{Status: StWarning, Time: cpuState.Touched}) + defer setup()() testSched(t, &schedTest{ conf: ` @@ -304,10 +297,10 @@ alert c { state: map[schedState]bool{ schedState{"a{host=ny01,source=bosun01}", "warning"}: true, }, - previous: map[models.AlertKey]*State{ - "a{host=ny01,source=bosun01}": pingState, - "b{host=ny01}": scollState, - "c{host=ny01}": cpuState, + touched: map[models.AlertKey]time.Time{ + "a{host=ny01,source=bosun01}": queryTime.Add(-5 * time.Minute), + "b{host=ny01}": queryTime.Add(-10 * time.Minute), + "c{host=ny01}": queryTime.Add(-10 * time.Minute), }, }) } diff --git a/cmd/bosun/sched/filter.go b/cmd/bosun/sched/filter.go index 8e2dc48610..c53b7da088 100644 --- a/cmd/bosun/sched/filter.go +++ b/cmd/bosun/sched/filter.go @@ -5,16 +5,17 @@ import ( "strings" "bosun.org/cmd/bosun/conf" + "bosun.org/models" ) -func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *State) bool, error) { +func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *models.IncidentState) bool, error) { fields := strings.Fields(filter) if len(fields) == 0 { - return func(c *conf.Conf, a *conf.Alert, s *State) bool { + return func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { return true }, nil } - fs := make(map[string][]func(c *conf.Conf, a *conf.Alert, s *State) bool) + fs := make(map[string][]func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool) for _, f := range fields { negate := strings.HasPrefix(f, "!") if negate { @@ -29,8 +30,8 @@ func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *State) bool, erro if len(sp) == 1 { key = "" } - add := func(fn func(c *conf.Conf, a *conf.Alert, s *State) bool) { - fs[key] = append(fs[key], func(c *conf.Conf, a *conf.Alert, s *State) bool { + add := func(fn func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool) { + fs[key] = append(fs[key], func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { v := fn(c, a, s) if negate { v = !v @@ -40,8 +41,8 @@ func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *State) bool, erro } switch key { case "": - add(func(c *conf.Conf, a *conf.Alert, s *State) bool { - ak := s.AlertKey() + add(func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { + ak := s.AlertKey return strings.Contains(string(ak), value) || strings.Contains(string(s.Subject), value) }) case "ack": @@ -54,14 +55,14 @@ func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *State) bool, erro default: return nil, fmt.Errorf("unknown %s value: %s", key, value) } - add(func(c *conf.Conf, a *conf.Alert, s *State) bool { + add(func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { return s.NeedAck != v }) case "notify": - add(func(c *conf.Conf, a *conf.Alert, s *State) bool { + add(func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { r := false f := func(ns *conf.Notifications) { - for k := range ns.Get(c, s.Group) { + for k := range ns.Get(c, s.AlertKey.Group()) { if strings.Contains(k, value) { r = true break @@ -73,27 +74,27 @@ func makeFilter(filter string) (func(*conf.Conf, *conf.Alert, *State) bool, erro return r }) case "status": - var v Status + var v models.Status switch value { case "normal": - v = StNormal + v = models.StNormal case "warning": - v = StWarning + v = models.StWarning case "critical": - v = StCritical + v = models.StCritical case "unknown": - v = StUnknown + v = models.StUnknown default: return nil, fmt.Errorf("unknown %s value: %s", key, value) } - add(func(c *conf.Conf, a *conf.Alert, s *State) bool { - return s.AbnormalStatus() == v + add(func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { + return s.LastAbnormalStatus == v }) default: return nil, fmt.Errorf("unknown filter key: %s", key) } } - return func(c *conf.Conf, a *conf.Alert, s *State) bool { + return func(c *conf.Conf, a *conf.Alert, s *models.IncidentState) bool { for _, ors := range fs { match := false for _, f := range ors { diff --git a/cmd/bosun/sched/grouping_test.go b/cmd/bosun/sched/grouping_test.go index 92f625c245..05e886ef37 100644 --- a/cmd/bosun/sched/grouping_test.go +++ b/cmd/bosun/sched/grouping_test.go @@ -8,7 +8,8 @@ import ( ) func TestGroupSets_Single(t *testing.T) { - states := States{"a{host=foo}": &State{Alert: "a", Group: opentsdb.TagSet{"host": "foo"}, Subject: "aaa"}} + ak := models.AlertKey("a{host=foo}") + states := States{ak: &models.IncidentState{AlertKey: ak, Alert: "a", Tags: opentsdb.TagSet{"host": "foo"}.Tags(), Subject: "aaa"}} groups := states.GroupSets(5) if len(groups) != 1 { t.Fatalf("Expected 1 group. Found %d.", len(groups)) @@ -31,7 +32,7 @@ func TestGroupSets_AboveAndBelow(t *testing.T) { if err != nil { t.Fatal(err) } - states[ak] = &State{Alert: ak.Name(), Group: ak.Group(), Subject: sub} + states[ak] = &models.IncidentState{AlertKey: models.AlertKey(a), Alert: ak.Name(), Tags: ak.Group().Tags(), Subject: sub} } groups := states.GroupSets(5) @@ -58,7 +59,7 @@ func TestGroupSets_ByAlert(t *testing.T) { if err != nil { t.Fatal(err) } - states[ak] = &State{Alert: ak.Name(), Group: ak.Group(), Subject: sub} + states[ak] = &models.IncidentState{AlertKey: models.AlertKey(a), Alert: ak.Name(), Tags: ak.Group().Tags(), Subject: sub} } groups := states.GroupSets(5) diff --git a/cmd/bosun/sched/host.go b/cmd/bosun/sched/host.go index 4cdafdd53d..8284a84e18 100644 --- a/cmd/bosun/sched/host.go +++ b/cmd/bosun/sched/host.go @@ -6,7 +6,6 @@ import ( "time" "bosun.org/metadata" - "bosun.org/models" "bosun.org/opentsdb" "bosun.org/slog" ) @@ -21,7 +20,10 @@ func (s *Schedule) Host(filter string) (map[string]*HostData, error) { for _, h := range allHosts { hosts[h] = newHostData() } - states := s.GetOpenStates() + states, err := s.GetOpenStates() + if err != nil { + return nil, err + } silences := s.Silenced() // These are all fetched by metric since that is how we store it in redis, // so this makes for the fastest response @@ -614,24 +616,24 @@ func statusString(val, goodVal int64, goodName, badName string) string { return badName } -func processHostIncidents(host *HostData, states States, silences map[models.AlertKey]models.Silence) { +func processHostIncidents(host *HostData, states States, silences SilenceTester) { for ak, state := range states { - if stateHost, ok := state.Group["host"]; !ok { + if stateHost, ok := state.AlertKey.Group()["host"]; !ok { continue } else if stateHost != host.Name { continue } - _, silenced := silences[ak] + silenced := silences(ak) is := IncidentStatus{ - IncidentID: state.Last().IncidentId, + IncidentID: state.Id, Active: state.IsActive(), - AlertKey: state.AlertKey(), - Status: state.Status(), + AlertKey: state.AlertKey, + Status: state.CurrentStatus, StatusTime: state.Last().Time.Unix(), Subject: state.Subject, - Silenced: silenced, - LastAbnormalStatus: state.AbnormalStatus(), - LastAbnormalTime: state.AbnormalEvent().Time.Unix(), + Silenced: silenced != nil, + LastAbnormalStatus: state.LastAbnormalStatus, + LastAbnormalTime: state.LastAbnormalTime, NeedsAck: state.NeedAck, } host.OpenIncidents = append(host.OpenIncidents, is) diff --git a/cmd/bosun/sched/notification_test.go b/cmd/bosun/sched/notification_test.go index 086344a51d..0cf70e2610 100644 --- a/cmd/bosun/sched/notification_test.go +++ b/cmd/bosun/sched/notification_test.go @@ -7,7 +7,6 @@ import ( "bosun.org/cmd/bosun/conf" "bosun.org/models" - "bosun.org/opentsdb" ) func TestActionNotificationTemplates(t *testing.T) { @@ -18,15 +17,15 @@ func TestActionNotificationTemplates(t *testing.T) { } s, _ := initSched(c) data := &actionNotificationContext{} - data.ActionType = ActionAcknowledge + data.ActionType = models.ActionAcknowledge data.Message = "Bad things happened" data.User = "Batman" - data.States = []*State{ + data.States = []*models.IncidentState{ { - History: []Event{ + Id: 224, + Events: []models.Event{ { - Status: StCritical, - IncidentId: 224, + Status: models.StCritical, }, }, Alert: "xyz", @@ -53,6 +52,7 @@ func TestActionNotificationTemplates(t *testing.T) { } func TestActionNotificationGrouping(t *testing.T) { + defer setup()() c, err := conf.New("", ` template t{ subject = 2 @@ -114,14 +114,18 @@ func TestActionNotificationGrouping(t *testing.T) { bcrit := models.AlertKey("b{host=c}") cA := models.AlertKey("c{host=a}") cB := models.AlertKey("c{host=b}") - s.status[awarn] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}} - s.status[acrit] = &State{Alert: "a", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}} - s.status[bwarn] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "w"}, History: []Event{{Status: StWarning}}} - s.status[bcrit] = &State{Alert: "b", Group: opentsdb.TagSet{"host": "c"}, History: []Event{{Status: StCritical}}} - s.status[cA] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "a"}, History: []Event{{Status: StWarning}}} - s.status[cB] = &State{Alert: "c", Group: opentsdb.TagSet{"host": "b"}, History: []Event{{Status: StWarning}}} + da := s.DataAccess.State() + da.UpdateIncidentState(&models.IncidentState{AlertKey: awarn, Alert: awarn.Name(), Tags: awarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) + da.UpdateIncidentState(&models.IncidentState{AlertKey: acrit, Alert: acrit.Name(), Tags: acrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) + da.UpdateIncidentState(&models.IncidentState{AlertKey: bwarn, Alert: bwarn.Name(), Tags: bwarn.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) + da.UpdateIncidentState(&models.IncidentState{AlertKey: bcrit, Alert: bcrit.Name(), Tags: bcrit.Group().Tags(), WorstStatus: models.StCritical, Events: []models.Event{{Status: models.StCritical}}}) + da.UpdateIncidentState(&models.IncidentState{AlertKey: cA, Alert: cA.Name(), Tags: cA.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) + da.UpdateIncidentState(&models.IncidentState{AlertKey: cB, Alert: cB.Name(), Tags: cB.Group().Tags(), WorstStatus: models.StWarning, Events: []models.Event{{Status: models.StWarning}}}) - groups := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB}) + groups, err := s.groupActionNotifications([]models.AlertKey{awarn, acrit, bwarn, bcrit, cA, cB}) + if err != nil { + t.Fatal(err) + } expect := func(not string, aks ...models.AlertKey) { n := c.Notifications[not] actualAks, ok := groups[n] @@ -132,8 +136,8 @@ func TestActionNotificationGrouping(t *testing.T) { t.Fatalf("Count mismatch for grouping %s. %d != %d.", not, len(actualAks), len(aks)) } for i, ak := range aks { - if actualAks[i].AlertKey() != ak { - t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey(), ak) + if actualAks[i].AlertKey != ak { + t.Fatalf("Alert key mismatch at index %d. %s != %s.", i, actualAks[i].AlertKey, ak) } } } diff --git a/cmd/bosun/sched/notify.go b/cmd/bosun/sched/notify.go index 9a0b330fb7..5107be0b3f 100644 --- a/cmd/bosun/sched/notify.go +++ b/cmd/bosun/sched/notify.go @@ -29,9 +29,9 @@ func (s *Schedule) dispatchNotifications() { } -func (s *Schedule) Notify(st *State, n *conf.Notification) { +func (s *Schedule) Notify(st *models.IncidentState, n *conf.Notification) { if s.pendingNotifications == nil { - s.pendingNotifications = make(map[*conf.Notification][]*State) + s.pendingNotifications = make(map[*conf.Notification][]*models.IncidentState) } s.pendingNotifications[n] = append(s.pendingNotifications[n], st) } @@ -45,7 +45,7 @@ func (s *Schedule) CheckNotifications() time.Duration { notifications := s.Notifications s.Notifications = nil for ak, ns := range notifications { - if _, present := silenced[ak]; present { + if si := silenced(ak); si != nil { slog.Infoln("silencing", ak) continue } @@ -59,16 +59,30 @@ func (s *Schedule) CheckNotifications() time.Duration { s.AddNotification(ak, n, t) continue } - st := s.status[ak] - if st == nil { - continue + + //If alert is currently unevaluated because of a dependency, + //simply requeue it until the dependency resolves itself. + _, uneval := s.GetUnknownAndUnevaluatedAlertKeys(ak.Name()) + unevaluated := false + for _, un := range uneval { + if un == ak { + unevaluated = true + break + } } - // If alert is currently unevaluated because of a dependency, - // simply requeue it until the dependency resolves itself. - if st.Unevaluated { + if unevaluated { s.AddNotification(ak, n, t) continue } + st, err := s.DataAccess.State().GetLatestIncident(ak) + if err != nil { + slog.Error(err) + continue + } + if st == nil { + continue + } + s.Notify(st, n) } } @@ -91,16 +105,16 @@ func (s *Schedule) CheckNotifications() time.Duration { return timeout } -func (s *Schedule) sendNotifications(silenced map[models.AlertKey]models.Silence) { +func (s *Schedule) sendNotifications(silenced SilenceTester) { if s.Conf.Quiet { slog.Infoln("quiet mode prevented", len(s.pendingNotifications), "notifications") return } for n, states := range s.pendingNotifications { for _, st := range states { - ak := st.AlertKey() - _, silenced := silenced[ak] - if st.Last().Status == StUnknown { + ak := st.AlertKey + silenced := silenced(ak) != nil + if st.CurrentStatus == models.StUnknown { if silenced { slog.Infoln("silencing unknown", ak) continue @@ -124,7 +138,7 @@ func (s *Schedule) sendUnknownNotifications() { for n, states := range s.pendingUnknowns { ustates := make(States) for _, st := range states { - ustates[st.AlertKey()] = st + ustates[st.AlertKey] = st } var c int tHit := false @@ -148,7 +162,7 @@ func (s *Schedule) sendUnknownNotifications() { s.utnotify(oTSets, n) } } - s.pendingUnknowns = make(map[*conf.Notification][]*State) + s.pendingUnknowns = make(map[*conf.Notification][]*models.IncidentState) } var unknownMultiGroup = ttemplate.Must(ttemplate.New("unknownMultiGroup").Parse(` @@ -168,8 +182,8 @@ var unknownMultiGroup = ttemplate.Must(ttemplate.New("unknownMultiGroup").Parse( `)) -func (s *Schedule) notify(st *State, n *conf.Notification) { - n.Notify(st.Subject, st.Body, st.EmailSubject, st.EmailBody, s.Conf, string(st.AlertKey()), st.Attachments...) +func (s *Schedule) notify(st *models.IncidentState, n *conf.Notification) { + n.Notify(st.Subject, st.Body, st.EmailSubject, st.EmailBody, s.Conf, string(st.AlertKey), st.Attachments...) } // utnotify is single notification for N unknown groups into a single notification @@ -247,7 +261,7 @@ func init() { subject := `{{$first := index .States 0}}{{$count := len .States}} {{.User}} {{.ActionType}} {{if gt $count 1}} {{$count}} Alerts. -{{else}} Incident #{{$first.Last.IncidentId}} ({{$first.Subject}}) +{{else}} Incident #{{$first.Id}} ({{$first.Subject}}) {{end}}` body := `{{$count := len .States}}{{.User}} {{.ActionType}} {{$count}} alert{{if gt $count 1}}s{{end}}:
Message: {{.Message}}
@@ -255,7 +269,7 @@ func init() {