From 43cbe7cf48885bdda593ad3ea93a20bad4a26367 Mon Sep 17 00:00:00 2001 From: Anders Rejdebrant Date: Wed, 19 Aug 2015 13:25:10 +0200 Subject: [PATCH 1/4] Scollector features: "-" patterns, httpunit time_total + Freq setting. Negative filter patterns ("-" prefix), Added time_total metric to httpunit collector, Freq setting for HTTPUnit collectors. --- cmd/scollector/collectors/collectors.go | 35 +++++++++++++++++++++---- cmd/scollector/collectors/httpunit.go | 20 ++++++++------ cmd/scollector/conf/conf.go | 1 + cmd/scollector/doc.go | 3 ++- cmd/scollector/main.go | 5 ++-- 5 files changed, 48 insertions(+), 16 deletions(-) diff --git a/cmd/scollector/collectors/collectors.go b/cmd/scollector/collectors/collectors.go index 450e5e180a..1e38d31eb6 100644 --- a/cmd/scollector/collectors/collectors.go +++ b/cmd/scollector/collectors/collectors.go @@ -55,9 +55,11 @@ const ( osNetDropped = "os.net.dropped" osNetErrors = "os.net.errs" osNetMulticast = "os.net.packets_multicast" + osNetOperStatus = "os.net.oper_status" osNetPackets = "os.net.packets" osNetPauseFrames = "os.net.pause_frames" osNetUnicast = "os.net.packets_unicast" + osServiceRunning = "os.service.running" osSystemUptime = "os.system.uptime" osNetMTU = "os.net.mtu" osNetAdminStatus = "os.net.admin_status" @@ -135,6 +137,29 @@ func now() (t int64) { return } +func matchPattern(s string, patterns []string) bool { + for _, p := range patterns { + if !strings.HasPrefix(p, "-") { + if strings.Contains(s, p) { + return true + } + } + } + return false +} + +func matchInvertPattern(s string, patterns []string) bool { + for _, p := range patterns { + if strings.HasPrefix(p, "-") { + var np = p[1:] + if strings.Contains(s, np) { + return true + } + } + } + return false +} + // Search returns all collectors matching the pattern s. func Search(s []string) []Collector { if len(s) == 0 { @@ -142,11 +167,11 @@ func Search(s []string) []Collector { } var r []Collector for _, c := range collectors { - for _, p := range s { - if strings.Contains(c.Name(), p) { - r = append(r, c) - break - } + if matchInvertPattern(c.Name(), s) { + continue + } + if matchPattern(c.Name(), s) { + r = append(r, c) } } return r diff --git a/cmd/scollector/collectors/httpunit.go b/cmd/scollector/collectors/httpunit.go index 1101b3439c..2e63aab652 100644 --- a/cmd/scollector/collectors/httpunit.go +++ b/cmd/scollector/collectors/httpunit.go @@ -10,33 +10,34 @@ import ( "github.com/StackExchange/httpunit" ) -func HTTPUnitTOML(filename string) error { +func HTTPUnitTOML(filename string, freq time.Duration) error { var plans httpunit.Plans if _, err := toml.DecodeFile(filename, &plans); err != nil { return err } - HTTPUnitPlans(filename, &plans) + HTTPUnitPlans(filename, &plans, freq) return nil } -func HTTPUnitHiera(filename string) error { +func HTTPUnitHiera(filename string, freq time.Duration) error { plans, err := httpunit.ExtractHiera(filename) if err != nil { return err } - HTTPUnitPlans(filename, &httpunit.Plans{ - Plans: plans, - }) + HTTPUnitPlans(filename, &httpunit.Plans{Plans: plans}, freq) return nil } -func HTTPUnitPlans(name string, plans *httpunit.Plans) { +func HTTPUnitPlans(name string, plans *httpunit.Plans, freq time.Duration) { + if freq < time.Second { + freq = time.Minute * 5 + } collectors = append(collectors, &IntervalCollector{ F: func() (opentsdb.MultiDataPoint, error) { return cHTTPUnit(plans) }, name: fmt.Sprintf("c_httpunit_%s", name), - Interval: time.Minute * 5, + Interval: freq, }) } @@ -53,8 +54,10 @@ func cHTTPUnit(plans *httpunit.Plans) (opentsdb.MultiDataPoint, error) { "url_host": r.Case.URL.Host, "hc_test_case": r.Plan.Label, } + ms := r.Result.TimeTotal / time.Millisecond Add(&md, "hu.error", r.Result.Result != nil, tags, metadata.Gauge, metadata.Bool, descHTTPUnitError) Add(&md, "hu.socket_connected", r.Result.Connected, tags, metadata.Gauge, metadata.Bool, descHTTPUnitSocketConnected) + Add(&md, "hu.time_total", ms, tags, metadata.Gauge, metadata.MilliSecond, descHTTPUnitTotalTime) switch r.Case.URL.Scheme { case "http", "https": Add(&md, "hu.http.got_expected_code", r.Result.GotCode, tags, metadata.Gauge, metadata.Bool, descHTTPUnitExpectedCode) @@ -79,4 +82,5 @@ const ( descHTTPUnitExpectedRegex = "1 if the response matched expected regex, else 0." descHTTPUnitCertValid = "1 if the SSL certificate is valid, else 0." descHTTPUnitCertExpires = "Unix epoch time of the certificate expiration." + descHTTPUnitTotalTime = "Total time consumed by test case." ) diff --git a/cmd/scollector/conf/conf.go b/cmd/scollector/conf/conf.go index 22d46a2c98..a5ec89fd97 100644 --- a/cmd/scollector/conf/conf.go +++ b/cmd/scollector/conf/conf.go @@ -157,6 +157,7 @@ type ProcessDotNet struct { type HTTPUnit struct { TOML string Hiera string + Freq int } type Riak struct { diff --git a/cmd/scollector/doc.go b/cmd/scollector/doc.go index 13d440c829..94349f2012 100644 --- a/cmd/scollector/doc.go +++ b/cmd/scollector/doc.go @@ -214,13 +214,14 @@ ProcessDotNet. HTTPUnit (array of table, keys are TOML, Hiera): httpunit TOML and Hiera files to read and monitor. See https://github.com/StackExchange/httpunit for documentation about the toml file. TOML and Hiera may both be specified, -or just one. +or just one. Freq is collector frequency in seconds. [[HTTPUnit]] TOML = "/path/to/httpunit.toml" Hiera = "/path/to/listeners.json" [[HTTPUnit]] TOML = "/some/other.toml" + Freq = 15 Riak (array of table, keys are URL): Riak hosts to poll. diff --git a/cmd/scollector/main.go b/cmd/scollector/main.go index bd93e08226..b9d0c7304b 100644 --- a/cmd/scollector/main.go +++ b/cmd/scollector/main.go @@ -124,11 +124,12 @@ func main() { check(collectors.AddProcessDotNetConfig(p)) } for _, h := range conf.HTTPUnit { + freq := time.Second * time.Duration(h.Freq) if h.TOML != "" { - check(collectors.HTTPUnitTOML(h.TOML)) + check(collectors.HTTPUnitTOML(h.TOML, freq)) } if h.Hiera != "" { - check(collectors.HTTPUnitHiera(h.Hiera)) + check(collectors.HTTPUnitHiera(h.Hiera, freq)) } } for _, r := range conf.Riak { From 39ebce9c30fb570135b9f5a3eceef196e926ea72 Mon Sep 17 00:00:00 2001 From: Greg Bray Date: Mon, 29 Feb 2016 17:19:34 -0700 Subject: [PATCH 2/4] cmd/scollector: sort constants in collectors.go to try and prevent merge conflicts --- cmd/scollector/collectors/collectors.go | 33 ++++++++++++------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/cmd/scollector/collectors/collectors.go b/cmd/scollector/collectors/collectors.go index 1e38d31eb6..79eb89d9e6 100644 --- a/cmd/scollector/collectors/collectors.go +++ b/cmd/scollector/collectors/collectors.go @@ -30,6 +30,7 @@ type Collector interface { ApplyTagOverrides(opentsdb.TagSet) } +//These should be in alphabetical order to help prevent merge conflicts const ( osCPU = "os.cpu" osCPUClock = "os.cpu.clock" @@ -41,34 +42,32 @@ const ( osMemPctFree = "os.mem.percent_free" osMemTotal = "os.mem.total" osMemUsed = "os.mem.used" + osNetAdminStatus = "os.net.admin_status" osNetBondBroadcast = "os.net.bond.packets_broadcast" osNetBondBytes = "os.net.bond.bytes" osNetBondDropped = "os.net.bond.dropped" osNetBondErrors = "os.net.bond.errs" + osNetBondIfSpeed = "os.net.bond.ifspeed" osNetBondMulticast = "os.net.bond.packets_multicast" osNetBondPackets = "os.net.bond.packets" osNetBondUnicast = "os.net.bond.packets_unicast" - osNetBondIfSpeed = "os.net.bond.ifspeed" - osNetIfSpeed = "os.net.ifspeed" osNetBroadcast = "os.net.packets_broadcast" osNetBytes = "os.net.bytes" osNetDropped = "os.net.dropped" osNetErrors = "os.net.errs" + osNetIfSpeed = "os.net.ifspeed" + osNetMTU = "os.net.mtu" osNetMulticast = "os.net.packets_multicast" osNetOperStatus = "os.net.oper_status" osNetPackets = "os.net.packets" osNetPauseFrames = "os.net.pause_frames" osNetUnicast = "os.net.packets_unicast" - osServiceRunning = "os.service.running" - osSystemUptime = "os.system.uptime" - osNetMTU = "os.net.mtu" - osNetAdminStatus = "os.net.admin_status" - osNetOperStatus = "os.net.oper_status" - osServiceRunning = "os.service.running" - osProcCPU = "os.proc.cpu" osProcCount = "os.proc.count" + osProcCPU = "os.proc.cpu" osProcMemReal = "os.proc.mem.real" osProcMemVirtual = "os.proc.mem.virtual" + osServiceRunning = "os.service.running" + osSystemUptime = "os.system.uptime" ) const ( @@ -81,24 +80,24 @@ const ( osMemPctFreeDesc = "The percent of free memory. In Linux free memory includes memory used by buffers and cache." osMemTotalDesc = "Total amount, in bytes, of physical memory available to the operating system." osMemUsedDesc = "The amount of used memory. In Linux this excludes memory used by buffers and cache." + osNetAdminStatusDesc = "The desired state of the interface. The testing(3) state indicates that no operational packets can be passed. When a managed system initializes, all interfaces start with ifAdminStatus in the down(2) state. As a result of either explicit management action or per configuration information retained by the managed system, ifAdminStatus is then changed to either the up(1) or testing(3) states (or remains in the down(2) state)." osNetBroadcastDesc = "The rate at which broadcast packets are sent or received on the network interface." osNetBytesDesc = "The rate at which bytes are sent or received over the network interface." osNetDroppedDesc = "The number of packets that were chosen to be discarded even though no errors had been detected to prevent transmission." osNetErrorsDesc = "The number of packets that could not be transmitted because of errors." - osNetMulticastDesc = "The rate at which multicast packets are sent or received on the network interface." - osNetPacketsDesc = "The rate at which packets are sent or received on the network interface." - osNetUnicastDesc = "The rate at which unicast packets are sent or received on the network interface." osNetIfSpeedDesc = "The total link speed of the network interface in Megabits per second." - osNetPauseFrameDesc = "The rate of pause frames sent or recieved on the network interface. An overwhelmed network element can send a pause frame, which halts the transmission of the sender for a specified period of time." - osSystemUptimeDesc = "Seconds since last reboot." osNetMTUDesc = "The maximum transmission unit for the ethernet frame." - osNetAdminStatusDesc = "The desired state of the interface. The testing(3) state indicates that no operational packets can be passed. When a managed system initializes, all interfaces start with ifAdminStatus in the down(2) state. As a result of either explicit management action or per configuration information retained by the managed system, ifAdminStatus is then changed to either the up(1) or testing(3) states (or remains in the down(2) state)." + osNetMulticastDesc = "The rate at which multicast packets are sent or received on the network interface." osNetOperStatusDesc = "The current operational state of the interface. The testing(3) state indicates that no operational packets can be passed. If ifAdminStatus is down(2) then ifOperStatus should be down(2). If ifAdminStatus is changed to up(1) then ifOperStatus should change to up(1) if the interface is ready to transmit and receive network traffic; it should change to dormant(5) if the interface is waiting for external actions (such as a serial line waiting for an incoming connection); it should remain in the down(2) state if and only if there is a fault that prevents it from going to the up(1) state; it should remain in the notPresent(6) state if the interface has missing (typically, hardware) components." - osServiceRunningDesc = "1: active, 0: inactive" - osProcCPUDesc = "The summed percentage of CPU time used by processes with this name (0-100)." + osNetPacketsDesc = "The rate at which packets are sent or received on the network interface." + osNetPauseFrameDesc = "The rate of pause frames sent or recieved on the network interface. An overwhelmed network element can send a pause frame, which halts the transmission of the sender for a specified period of time." + osNetUnicastDesc = "The rate at which unicast packets are sent or received on the network interface." osProcCountDesc = "The number of processes running with this name." + osProcCPUDesc = "The summed percentage of CPU time used by processes with this name (0-100)." osProcMemRealDesc = "The total amount of real memory used by the processes with this name. For Linux this is RSS and in Windows it is the private working set." osProcMemVirtualDesc = "The total amount of virtual memory used by the processes with this name." + osServiceRunningDesc = "1: active, 0: inactive" + osSystemUptimeDesc = "Seconds since last reboot." ) var ( From 03705181af08854280f8c5626d485888c2ace28b Mon Sep 17 00:00:00 2001 From: Greg Bray Date: Mon, 29 Feb 2016 18:50:03 -0700 Subject: [PATCH 3/4] cmd/scollector: Use * in -f to include all non-excluded collectors also add a log message when metrics are being filtered since I spent 20 minutes trying to figure out why my -p and -f tests were not working closes #1263 --- cmd/scollector/collectors/collectors.go | 6 ++++-- cmd/scollector/doc.go | 13 ++++++++----- cmd/scollector/main.go | 1 + 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/cmd/scollector/collectors/collectors.go b/cmd/scollector/collectors/collectors.go index 79eb89d9e6..ea6f1031df 100644 --- a/cmd/scollector/collectors/collectors.go +++ b/cmd/scollector/collectors/collectors.go @@ -165,11 +165,13 @@ func Search(s []string) []Collector { return collectors } var r []Collector + sort.Strings(s) + i := sort.SearchStrings(s, "*") + IncludeAll := i < len(s) && s[i] == "*" for _, c := range collectors { if matchInvertPattern(c.Name(), s) { continue - } - if matchPattern(c.Name(), s) { + } else if IncludeAll || matchPattern(c.Name(), s) { r = append(r, c) } } diff --git a/cmd/scollector/doc.go b/cmd/scollector/doc.go index 94349f2012..c90d0925e9 100644 --- a/cmd/scollector/doc.go +++ b/cmd/scollector/doc.go @@ -22,8 +22,9 @@ The flags are: -h="" OpenTSDB or Bosun host. Overrides Host in conf file. -f="" - Filters collectors matching these terms, separated by - comma. Overrides Filter in conf file. + Only include collectors matching these comma separated terms. Prefix + with - to invert match and exclude collectors matching those terms. Use + *,-term,-anotherterm to include all collectors except excluded terms. -b=0 OpenTSDB batch size. Default is 500. -conf="" @@ -102,10 +103,12 @@ Default is 500. MaxQueueLen (integer): is the number of metrics keept internally. Default is 200000. -Filter (array of string): filters collectors matching these terms. +Filter (array of string): Only include collectors matching these terms. Prefix +with - to invert match and exclude collectors matching those terms. Use +*,-term,-anotherterm to include all collectors except excluded terms. -MetricFilters (array of string): filters metrics matching these regular -expressions. +MetricFilters (array of string): only send metrics matching these regular +expressions. Example ['^(win\.cpu|win\.system\..*)$', 'free'] IfaceExpr (string): Replaces the default regular expression for interface name matching on Linux. diff --git a/cmd/scollector/main.go b/cmd/scollector/main.go index b9d0c7304b..dae46ed715 100644 --- a/cmd/scollector/main.go +++ b/cmd/scollector/main.go @@ -100,6 +100,7 @@ func main() { } collectors.Init(conf) for _, r := range conf.MetricFilters { + slog.Infof("Adding MetricFilter: %v\n", r) check(collectors.AddMetricFilters(r)) } for _, rmq := range conf.RabbitMQ { From 51a692981696936c0f44a5d71082723c3c819b25 Mon Sep 17 00:00:00 2001 From: Greg Bray Date: Tue, 1 Mar 2016 11:14:46 -0700 Subject: [PATCH 4/4] cmd/scollector: change HTTPUnit.Freq to duration string --- cmd/scollector/collectors/httpunit.go | 3 --- cmd/scollector/conf/conf.go | 2 +- cmd/scollector/doc.go | 4 ++-- cmd/scollector/main.go | 14 +++++++++++++- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/cmd/scollector/collectors/httpunit.go b/cmd/scollector/collectors/httpunit.go index 2e63aab652..5f426520c1 100644 --- a/cmd/scollector/collectors/httpunit.go +++ b/cmd/scollector/collectors/httpunit.go @@ -29,9 +29,6 @@ func HTTPUnitHiera(filename string, freq time.Duration) error { } func HTTPUnitPlans(name string, plans *httpunit.Plans, freq time.Duration) { - if freq < time.Second { - freq = time.Minute * 5 - } collectors = append(collectors, &IntervalCollector{ F: func() (opentsdb.MultiDataPoint, error) { return cHTTPUnit(plans) diff --git a/cmd/scollector/conf/conf.go b/cmd/scollector/conf/conf.go index a5ec89fd97..74a6c4b6f7 100644 --- a/cmd/scollector/conf/conf.go +++ b/cmd/scollector/conf/conf.go @@ -157,7 +157,7 @@ type ProcessDotNet struct { type HTTPUnit struct { TOML string Hiera string - Freq int + Freq string } type Riak struct { diff --git a/cmd/scollector/doc.go b/cmd/scollector/doc.go index c90d0925e9..56f7d127d8 100644 --- a/cmd/scollector/doc.go +++ b/cmd/scollector/doc.go @@ -217,14 +217,14 @@ ProcessDotNet. HTTPUnit (array of table, keys are TOML, Hiera): httpunit TOML and Hiera files to read and monitor. See https://github.com/StackExchange/httpunit for documentation about the toml file. TOML and Hiera may both be specified, -or just one. Freq is collector frequency in seconds. +or just one. Freq is collector frequency as a duration string (default 5m). [[HTTPUnit]] TOML = "/path/to/httpunit.toml" Hiera = "/path/to/listeners.json" [[HTTPUnit]] TOML = "/some/other.toml" - Freq = 15 + Freq = "30s" Riak (array of table, keys are URL): Riak hosts to poll. diff --git a/cmd/scollector/main.go b/cmd/scollector/main.go index dae46ed715..8c050c120c 100644 --- a/cmd/scollector/main.go +++ b/cmd/scollector/main.go @@ -125,7 +125,19 @@ func main() { check(collectors.AddProcessDotNetConfig(p)) } for _, h := range conf.HTTPUnit { - freq := time.Second * time.Duration(h.Freq) + var freq time.Duration + var parseerr error + if h.Freq == "" { + freq = time.Minute * 5 + } else { + freq, parseerr = time.ParseDuration(h.Freq) + if parseerr != nil { + slog.Fatal(parseerr) + } + if freq < time.Second { + slog.Fatalf("Invalid HTTPUnit frequency %s, cannot be less than 1 second.", h.Freq) + } + } if h.TOML != "" { check(collectors.HTTPUnitTOML(h.TOML, freq)) }