这是indexloc提供的服务,不要输入任何密码
Skip to content
This repository was archived by the owner on Feb 13, 2025. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions cmd/bosun/sched/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,10 @@ func init() {
metadata.AddMetricMeta(
"bosun.alerts.active_status", metadata.Gauge, metadata.Alert,
"The number of open alerts by active status.")
metadata.AddMetricMeta("alerts.acknowledgement_status_by_notification", metadata.Gauge, metadata.Alert,
"The number of alerts by acknowledgement status and notification. Does not reflect escalation chains.")
metadata.AddMetricMeta("alerts.oldest_unacked_by_notification", metadata.Gauge, metadata.Second,
"How old the oldest unacknowledged notification is by notification.. Does not reflect escalation chains.")
collect.AggregateMeta("bosun.template.render", metadata.MilliSecond, "The amount of time it takes to render the specified alert template.")
}

Expand Down Expand Up @@ -306,6 +310,8 @@ func (s *Schedule) CollectStates() {
severityCounts := make(map[string]map[string]int64)
abnormalCounts := make(map[string]map[string]int64)
ackStatusCounts := make(map[string]map[bool]int64)
ackByNotificationCounts := make(map[string]map[bool]int64)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the map[bool] here is still odd. I think I would rather see ackCounts and unackedCounts maps[string]int

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe it follows the existing pattern though, no?

unAckOldestByNotification := make(map[string]time.Time)
activeStatusCounts := make(map[string]map[bool]int64)
// Initalize the Counts
for _, alert := range s.Conf.Alerts {
Expand All @@ -323,17 +329,70 @@ func (s *Schedule) CollectStates() {
ackStatusCounts[alert.Name][true] = 0
activeStatusCounts[alert.Name][true] = 0
}
for notificationName := range s.Conf.Notifications {
unAckOldestByNotification[notificationName] = time.Unix(1<<63-62135596801, 999999999)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wtf is this number?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Max value of time.Time, so I can loop and get the min. http://stackoverflow.com/a/32620397/107156

ackByNotificationCounts[notificationName] = make(map[bool]int64)
ackByNotificationCounts[notificationName][false] = 0
ackByNotificationCounts[notificationName][true] = 0
}
for _, state := range s.status {
if !state.Open {
continue
}
name := state.AlertKey().Name()
alertDef := s.Conf.Alerts[name]
nots := make(map[string]bool)
for name := range alertDef.WarnNotification.Get(s.Conf, state.Group) {
nots[name] = true
}
for name := range alertDef.CritNotification.Get(s.Conf, state.Group) {
nots[name] = true
}
incident, err := s.GetIncident(state.Last().IncidentId)
if err != nil {
slog.Errorln(err)
}
for notificationName := range nots {
ackByNotificationCounts[notificationName][state.NeedAck]++
if incident.Start.Before(unAckOldestByNotification[notificationName]) && state.NeedAck {
unAckOldestByNotification[notificationName] = incident.Start
}
}
severity := state.Status().String()
lastAbnormal := state.AbnormalStatus().String()
severityCounts[state.Alert][severity]++
abnormalCounts[state.Alert][lastAbnormal]++
ackStatusCounts[state.Alert][state.NeedAck]++
activeStatusCounts[state.Alert][state.IsActive()]++
}
for notification := range ackByNotificationCounts {
ts := opentsdb.TagSet{"notification": notification}
err := collect.Put("alerts.acknowledgement_status_by_notification",
ts.Copy().Merge(opentsdb.TagSet{"status": "unacknowledged"}),
ackByNotificationCounts[notification][true])
if err != nil {
slog.Errorln(err)
}
err = collect.Put("alerts.acknowledgement_status_by_notification",
ts.Copy().Merge(opentsdb.TagSet{"status": "acknowledged"}),
ackByNotificationCounts[notification][false])
if err != nil {
slog.Errorln(err)
}
}
for notification, timeStamp := range unAckOldestByNotification {
ts := opentsdb.TagSet{"notification": notification}
var ago time.Duration
if !timeStamp.Equal(time.Unix(1<<63-62135596801, 999999999)) {
ago = time.Now().UTC().Sub(timeStamp)
}
err := collect.Put("alerts.oldest_unacked_by_notification",
ts,
ago.Seconds())
if err != nil {
slog.Errorln(err)
}
}
for alertName := range severityCounts {
ts := opentsdb.TagSet{"alert": alertName}
// The tagset of the alert is not included because there is no way to
Expand Down
10 changes: 10 additions & 0 deletions cmd/bosun/sched/sched.go
Original file line number Diff line number Diff line change
Expand Up @@ -917,6 +917,16 @@ func (s *Schedule) GetIncidents(alert string, from, to time.Time) []*Incident {
return list
}

func (s *Schedule) GetIncident(id uint64) (*Incident, error) {
s.incidentLock.Lock()
incident, ok := s.Incidents[id]
s.incidentLock.Unlock()
if !ok {
return nil, fmt.Errorf("incident %d not found", id)
}
return incident, nil
}

func (s *Schedule) GetIncidentEvents(id uint64) (*Incident, []Event, []Action, error) {
s.incidentLock.Lock()
incident, ok := s.Incidents[id]
Expand Down