Skip to content

Commit

Permalink
Merge pull request #242 from atc0005/i220-add-support-for-filtering-b…
Browse files Browse the repository at this point in the history
…y-entity-name

Add support for filtering via entity name
  • Loading branch information
atc0005 committed Jun 11, 2021
2 parents 92faf58 + a1952d9 commit 6d82171
Show file tree
Hide file tree
Showing 10 changed files with 400 additions and 55 deletions.
42 changes: 21 additions & 21 deletions README.md

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions cmd/check_vmware_alarms/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ func main() {
triggeredAlarmFilters := vsphere.TriggeredAlarmFilters{
IncludedAlarmEntityTypes: cfg.IncludedAlarmEntityTypes,
ExcludedAlarmEntityTypes: cfg.ExcludedAlarmEntityTypes,
IncludedAlarmEntityNames: cfg.IncludedAlarmEntityNames,
ExcludedAlarmEntityNames: cfg.ExcludedAlarmEntityNames,
IncludedAlarmNames: cfg.IncludedAlarmNames,
ExcludedAlarmNames: cfg.ExcludedAlarmNames,
IncludedAlarmDescriptions: cfg.IncludedAlarmDescriptions,
Expand Down
261 changes: 259 additions & 2 deletions cmd/check_vmware_alarms/main_test.go

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions contrib/nagios/etc/nagios-plugins/config/vmware-alarms.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ define command{
# alarms which have been previously acknowledged.
define command{
command_name check_vmware_alarms_eval_acknowledged
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-entity-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
}

# Look at triggered alarms for specified managed object types (e.g., Datastore
# and VirtualMachine), across all detected datacenters, do not evaluate any
# triggered alarms which have been previously acknowledged.
define command{
command_name check_vmware_alarms_include_types
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-type '$ARG4$' --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-entity-type '$ARG4$' --trust-cert --log-level info
}

# Look at triggered alarms for specified managed object types (e.g., Datastore
Expand All @@ -42,7 +42,7 @@ define command{
# resolved.
define command{
command_name check_vmware_alarms_include_types_eval_acknowledged
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-entity-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
}

# Look at triggered alarms associated with managed objects which do not match
Expand All @@ -51,7 +51,7 @@ define command{
# triggered alarms which have been previously acknowledged.
define command{
command_name check_vmware_alarms_exclude_types
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-type '$ARG4$' --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-entity-type '$ARG4$' --trust-cert --log-level info
}

# Look at triggered alarms associated with managed objects which do not match
Expand All @@ -61,7 +61,7 @@ define command{
# resolved.
define command{
command_name check_vmware_alarms_exclude_types_eval_acknowledged
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-entity-type '$ARG4$' --eval-acknowledged --trust-cert --log-level info
}

# Look at triggered alarms whose name matches the specified list of alarm name
Expand Down Expand Up @@ -162,15 +162,15 @@ define command{
# triggered alarms which have been previously acknowledged.
define command{
command_name check_vmware_alarms_specific_dc_include_types
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-type '$ARG4$' --dc-name '$ARG5$' --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-entity-type '$ARG4$' --dc-name '$ARG5$' --trust-cert --log-level info
}

# Look at triggered alarms for specified managed object types (e.g., Datastore
# and VirtualMachine), within specified datacenters. Evaluate any triggered
# alarms which have been previously acknowledged, but not yet resolved.
define command{
command_name check_vmware_alarms_specific_dc_include_types_eval_acknowledged
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-type '$ARG4$' --dc-name '$ARG5$' --eval-acknowledged --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --include-entity-type '$ARG4$' --dc-name '$ARG5$' --eval-acknowledged --trust-cert --log-level info
}

# Look at triggered alarms associated with managed objects which do not match
Expand All @@ -179,7 +179,7 @@ define command{
# alarms which have been previously acknowledged.
define command{
command_name check_vmware_alarms_specific_dc_exclude_types
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-type '$ARG4$' --dc-name '$ARG5$' --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-entity-type '$ARG4$' --dc-name '$ARG5$' --trust-cert --log-level info
}

# Look at triggered alarms associated with managed objects which do not match
Expand All @@ -188,7 +188,7 @@ define command{
# which have been previously acknowledged, but not yet resolved.
define command{
command_name check_vmware_alarms_specific_dc_exclude_types_eval_acknowledged
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-type '$ARG4$' --dc-name '$ARG5$' --eval-acknowledged --trust-cert --log-level info
command_line /usr/lib/nagios/plugins/check_vmware_alarms --server '$HOSTNAME$' --domain '$ARG1$' --username '$ARG2$' --password '$ARG3$' --exclude-entity-type '$ARG4$' --dc-name '$ARG5$' --eval-acknowledged --trust-cert --log-level info
}

# Look at triggered alarms whose name matches the specified list of alarm name
Expand Down
13 changes: 13 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,19 @@ type Config struct {
// inclusions.
ExcludedAlarmEntityTypes multiValueStringFlag

// IncludedAlarmEntityNames is a list of entity names for Alarms that will
// be explicitly included for evaluation. Unless included by later
// filtering logic, unmatched Triggered Alarms will be excluded from final
// evaluation. Explicitly included Triggered Alarms are still subject to
// permanent exclusion if a an explicit exclusion match is made.
IncludedAlarmEntityNames multiValueStringFlag

// ExcludedAlarmEntityTypes is a list of entity names for Alarms that will
// be explicitly excluded from further evaluation by other stages in the
// filtering pipeline. Explicit exclusions have precedence over explicit
// inclusions.
ExcludedAlarmEntityNames multiValueStringFlag

// IncludedAlarmNames is a list of names for defined Alarms that will be
// explicitly included for evaluation. Unless included by later filtering
// logic, unmatched Triggered Alarms will be excluded from final
Expand Down
2 changes: 2 additions & 0 deletions internal/config/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ const (
virtualHardwareDefaultIsMinimumFlagHelp string = "If specified, the host or cluster default virtual hardware version is the minimum hardware version allowed. Any Virtual Machine not meeting this minimum value is considered to be in a WARNING state."
includedAlarmEntityTypesFlagHelp string = "If specified, triggered alarms will only be evaluated if the associated entity type (e.g., Datastore) matches one of the provided values."
excludedAlarmEntityTypesFlagHelp string = "If specified, triggered alarms will only be evaluated if the associated entity type (e.g., Datastore) does NOT match one of the provided values."
includedAlarmEntityNamesFlagHelp string = "If specified, triggered alarms will only be evaluated if the associated entity name (e.g., \"node1.example.com\") matches one of the provided values."
excludedAlarmEntityNamesFlagHelp string = "If specified, triggered alarms will only be evaluated if the associated entity name (e.g., \"node1.example.com\") does NOT match one of the provided values."
evaluateAcknowledgedTriggeredAlarmFlagHelp string = "Toggles evaluation of acknowledged triggered alarms in addition to unacknowledged triggered alarms. Evaluation of acknowledged alarms is disabled by default."
includedAlarmNamesFlagHelp string = "If specified, triggered alarms will only be evaluated if the alarm name (e.g., \"Datastore usage on disk\") case-insensitively matches one of the provided substring values (e.g., \"datastore\" or \"datastore usage\") and is not explicitly excluded by another filter in the pipeline; while multiple explicit inclusions are allowed, explicit exclusions have precedence over explicit inclusions and will exclude the triggered alarm from further evaluation."
excludedAlarmNamesFlagHelp string = "If specified, triggered alarms will only be evaluated if the alarm name (e.g., \"Datastore usage on disk\") DOES NOT case-insensitively match one of the provided substring values (e.g., \"datastore\" or \"datastore usage\") and is not explicitly excluded by another filter in the pipeline; while multiple explicit inclusions are allowed, explicit exclusions have precedence over explicit inclusions and will exclude the triggered alarm from further evaluation."
Expand Down
7 changes: 5 additions & 2 deletions internal/config/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ func (c *Config) handleFlagsConfig(pluginType PluginType) {
case pluginType.Alarms:

flag.Var(&c.DatacenterNames, "dc-name", datacenterNamesFlagHelp)
flag.Var(&c.IncludedAlarmEntityTypes, "include-type", includedAlarmEntityTypesFlagHelp)
flag.Var(&c.ExcludedAlarmEntityTypes, "exclude-type", excludedAlarmEntityTypesFlagHelp)
flag.Var(&c.IncludedAlarmEntityTypes, "include-entity-type", includedAlarmEntityTypesFlagHelp)
flag.Var(&c.ExcludedAlarmEntityTypes, "exclude-entity-type", excludedAlarmEntityTypesFlagHelp)

flag.BoolVar(&c.EvaluateAcknowledgedAlarms, "eval-acknowledged", defaultEvaluateAcknowledgedAlarms, evaluateAcknowledgedTriggeredAlarmFlagHelp)

Expand All @@ -140,6 +140,9 @@ func (c *Config) handleFlagsConfig(pluginType PluginType) {
flag.Var(&c.includedAlarmStatuses, "include-status", includedAlarmStatusesFlagHelp)
flag.Var(&c.excludedAlarmStatuses, "exclude-status", excludedAlarmStatusesFlagHelp)

flag.Var(&c.IncludedAlarmEntityNames, "include-entity-name", includedAlarmEntityNamesFlagHelp)
flag.Var(&c.ExcludedAlarmEntityNames, "exclude-entity-name", excludedAlarmEntityNamesFlagHelp)

case pluginType.DatastoresSize:

flag.StringVar(&c.DatacenterName, "dc-name", defaultDatacenterName, datacenterNameFlagHelp)
Expand Down
13 changes: 11 additions & 2 deletions internal/config/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,17 @@ func (c Config) validate(pluginType PluginType) error {
if len(c.IncludedAlarmEntityTypes) > 0 && len(c.ExcludedAlarmEntityTypes) > 0 {
return fmt.Errorf(
"only one of %q or %q flags may be specified",
"include-type",
"exclude-type",
"include-entity-type",
"exclude-entity-type",
)
}

// only one of these options may be used
if len(c.IncludedAlarmEntityNames) > 0 && len(c.ExcludedAlarmEntityNames) > 0 {
return fmt.Errorf(
"only one of %q or %q flags may be specified",
"include-entity-name",
"exclude-entity-name",
)
}

Expand Down
89 changes: 70 additions & 19 deletions internal/vsphere/alarms.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ type TriggeredAlarms []TriggeredAlarm
type TriggeredAlarmFilters struct {
IncludedAlarmEntityTypes []string
ExcludedAlarmEntityTypes []string
IncludedAlarmEntityNames []string
ExcludedAlarmEntityNames []string
IncludedAlarmNames []string
ExcludedAlarmNames []string
IncludedAlarmDescriptions []string
Expand Down Expand Up @@ -529,7 +531,7 @@ func logTriggeredAlarmMarked(triggeredAlarm TriggeredAlarm, keep bool, explicit
switch {
case keep:
logger.Printf(
"Alarm (%s) for %q of type %q with name %q %s marked for inclusion",
"Alarm (%s) for entity name %q of type %q with alarm name %q %s marked for inclusion",
triggeredAlarm.OverallStatus,
triggeredAlarm.Entity.Name,
triggeredAlarm.Entity.MOID.Type,
Expand All @@ -539,7 +541,7 @@ func logTriggeredAlarmMarked(triggeredAlarm TriggeredAlarm, keep bool, explicit

default:
logger.Printf(
"Alarm (%s) for %q of type %q with name %q %s marked for exclusion",
"Alarm (%s) for entity name %q of type %q with alarm name %q %s marked for exclusion",
triggeredAlarm.OverallStatus,
triggeredAlarm.Entity.Name,
triggeredAlarm.Entity.MOID.Type,
Expand Down Expand Up @@ -678,6 +680,16 @@ func EntityStatusToNagiosState(entityStatus types.ManagedEntityStatus) (string,

}

// getSubstringFilterKeywords is a helper function that returns a map of all
// valid keywords used by the TriggeredAlarms.filterByString method.
// func getSubstringFilterKeywords() map[string]struct{} {
// return map[string]struct{}{
// AlarmDescription: struct{}{},
// AlarmName: struct{}{},
// EntityName: struct{}{},
// }
// }

// Filter explicitly includes or excludes TriggeredAlarms based on specified
// filter settings.
func (tas *TriggeredAlarms) Filter(filters TriggeredAlarmFilters) {
Expand All @@ -689,14 +701,17 @@ func (tas *TriggeredAlarms) Filter(filters TriggeredAlarmFilters) {
tas.filterByEntityType(filters.IncludedAlarmEntityTypes, filters.ExcludedAlarmEntityTypes)

logger.Println("Filtering triggered alarms by name")
tas.filterBySubstring(false, filters.IncludedAlarmNames, filters.ExcludedAlarmNames)
tas.filterBySubstring(alarmName, filters.IncludedAlarmNames, filters.ExcludedAlarmNames)

logger.Println("Filtering triggered alarms by description")
tas.filterBySubstring(true, filters.IncludedAlarmDescriptions, filters.ExcludedAlarmDescriptions)
tas.filterBySubstring(alarmDescription, filters.IncludedAlarmDescriptions, filters.ExcludedAlarmDescriptions)

logger.Println("Filtering triggered alarms by status")
tas.filterByStatus(filters.IncludedAlarmStatuses, filters.ExcludedAlarmStatuses)

logger.Println("Filtering triggered alarms by entity name")
tas.filterBySubstring(entityName, filters.IncludedAlarmEntityNames, filters.ExcludedAlarmEntityNames)

}

// FilterByIncludedEntityType accepts a slice of entity type keywords to use
Expand Down Expand Up @@ -767,11 +782,13 @@ func (tas *TriggeredAlarms) filterByEntityType(include []string, exclude []strin
switch {
// if the collection of TriggeredAlarms is empty, skip filtering attempts.
case len(*tas) == 0:
logger.Println("Triggered Alarms list is empty, aborting")
return

// if we're not limiting TriggeredAlarms by entity type, skip filtering
// attempts.
case len(include) == 0 && len(exclude) == 0:
logger.Println("Triggered Alarms status inclusion and exclusion lists are empty, aborting")
return
}

Expand Down Expand Up @@ -902,7 +919,7 @@ func (tas *TriggeredAlarms) FilterByIncludedNameSubstring(include []string) {
)
}()

tas.filterBySubstring(false, include, []string{})
tas.filterBySubstring(alarmName, include, []string{})

}

Expand All @@ -923,7 +940,7 @@ func (tas *TriggeredAlarms) FilterByExcludedNameSubstring(exclude []string) {
)
}()

tas.filterBySubstring(false, []string{}, exclude)
tas.filterBySubstring(alarmName, []string{}, exclude)

}

Expand All @@ -946,7 +963,7 @@ func (tas *TriggeredAlarms) FilterByIncludedDescriptionSubstring(include []strin
)
}()

tas.filterBySubstring(true, include, []string{})
tas.filterBySubstring(alarmDescription, include, []string{})

}

Expand All @@ -967,19 +984,20 @@ func (tas *TriggeredAlarms) FilterByExcludedDescriptionSubstring(exclude []strin
)
}()

tas.filterBySubstring(true, []string{}, exclude)
tas.filterBySubstring(alarmDescription, []string{}, exclude)

}

// filterBySubstring accepts slices of substrings to use in comparisons
// against TriggeredAlarm names or descriptions in order to explicitly mark
// TriggeredAlarms for inclusion or exclusion in the final evaluation. A
// boolean value indicates whether the comparison should be against the
// defined Alarm's description or name field.
// filterBySubstring accepts a field keyword and slices of substrings to use
// in comparisons against TriggeredAlarm fields in order to explicitly mark
// TriggeredAlarms for inclusion or exclusion in the final evaluation. The
// provided field keyword indicates which field the comparison should be
// against. If an invlaid field keyword is supplied the field comparison will
// default to using the alarm name.
//
// Flag evaluation logic prevents sysadmins from providing both an inclusion
// and exclusion list.
func (tas *TriggeredAlarms) filterBySubstring(useDescription bool, include []string, exclude []string) {
func (tas *TriggeredAlarms) filterBySubstring(fieldKeyword string, include []string, exclude []string) {

funcTimeStart := time.Now()

Expand All @@ -988,23 +1006,26 @@ func (tas *TriggeredAlarms) filterBySubstring(useDescription bool, include []str
// been applied in order to show the results of this filter.
nonExcludedStart := len(*tas) - tas.NumExcluded()

defer func(start *int) {
defer func(start *int, keyword string) {
logger.Printf(
"It took %v to execute filterBySubstring func (for %d non-excluded TriggeredAlarms, yielding %d non-excluded TriggeredAlarms)\n",
"It took %v to execute filterBySubstring func (for %d non-excluded TriggeredAlarms, using keyword %s, yielding %d non-excluded TriggeredAlarms)\n",
time.Since(funcTimeStart),
*start,
keyword,
len(*tas)-tas.NumExcluded(),
)
}(&nonExcludedStart)
}(&nonExcludedStart, fieldKeyword)

switch {
// if the collection of TriggeredAlarms is empty, skip filtering attempts.
case len(*tas) == 0:
logger.Println("Triggered Alarms list is empty, aborting")
return

// if we're not limiting TriggeredAlarms by entity type, skip filtering
// attempts.
case len(include) == 0 && len(exclude) == 0:
logger.Println("Triggered Alarms status inclusion and exclusion lists are empty, aborting")
return
}

Expand All @@ -1022,13 +1043,27 @@ func (tas *TriggeredAlarms) filterBySubstring(useDescription bool, include []str
)
}

// validKeywords := getSubstringFilterKeywords()
// if _, ok := validKeywords[fieldKeyword]; !ok {
// logger.Printf("")
// }

logger.Printf("substring field keyword %q specified", fieldKeyword)
for i := range *tas {

var substrField string
switch {
case useDescription:
switch fieldKeyword {
case alarmDescription:
substrField = (*tas)[i].Description
case alarmName:
substrField = (*tas)[i].Name
case entityName:
substrField = (*tas)[i].Entity.Name
default:
logger.Printf(
"substring field %q not recognized, defaulting to alarm name",
fieldKeyword,
)
substrField = (*tas)[i].Name
}

Expand Down Expand Up @@ -1440,6 +1475,22 @@ func AlarmsReport(
nagios.CheckOutputEOL,
)

fmt.Fprintf(
&report,
"* Specified Triggered Alarm entity names to explicitly include (%d): [%v]%s",
len(triggeredAlarmFilters.IncludedAlarmEntityNames),
strings.Join(triggeredAlarmFilters.IncludedAlarmEntityNames, ", "),
nagios.CheckOutputEOL,
)

fmt.Fprintf(
&report,
"* Specified Triggered Alarm entity names to explicitly exclude (%d): [%v]%s",
len(triggeredAlarmFilters.ExcludedAlarmEntityNames),
strings.Join(triggeredAlarmFilters.ExcludedAlarmEntityNames, ", "),
nagios.CheckOutputEOL,
)

fmt.Fprintf(
&report,
"* Specified Triggered Alarm names to explicitly include (%d): [%v]%s",
Expand Down

0 comments on commit 6d82171

Please sign in to comment.