diff --git a/modules/integration_aws-vpn/README.md b/modules/integration_aws-vpn/README.md index 40d8b8c99..af8ab25ad 100644 --- a/modules/integration_aws-vpn/README.md +++ b/modules/integration_aws-vpn/README.md @@ -76,7 +76,7 @@ This module creates the following SignalFx detectors which could contain one or |Detector|Critical|Major|Minor|Warning|Info| |---|---|---|---|---|---| |AWS VPN heartbeat|X|-|-|-|-| -|AWS VPN tunnel state|X|-|-|-|-| +|AWS VPN tunnel state|X|X|-|-|-| ## How to collect required metrics? diff --git a/modules/integration_aws-vpn/detectors-gen.tf b/modules/integration_aws-vpn/detectors-gen.tf index 5123b41cc..ff777c1e5 100644 --- a/modules/integration_aws-vpn/detectors-gen.tf +++ b/modules/integration_aws-vpn/detectors-gen.tf @@ -38,13 +38,26 @@ resource "signalfx_detector" "vpn_status" { base_filtering = filter('namespace', 'AWS/VPN') and filter('stat', 'mean') and filter('VpnId', '*') signal = data('TunnelState', filter=base_filtering and ${module.filtering.signalflow})${var.vpn_status_aggregation_function}${var.vpn_status_transformation_function}.publish('signal') detect(when(signal < ${var.vpn_status_threshold_critical}%{if var.vpn_status_lasting_duration_critical != null}, lasting='${var.vpn_status_lasting_duration_critical}', at_least=${var.vpn_status_at_least_percentage_critical}%{endif})).publish('CRIT') + detect(when(signal < ${var.vpn_status_threshold_major}%{if var.vpn_status_lasting_duration_major != null}, lasting='${var.vpn_status_lasting_duration_major}', at_least=${var.vpn_status_at_least_percentage_major}%{endif}) and (not when(signal < ${var.vpn_status_threshold_critical}%{if var.vpn_status_lasting_duration_critical != null}, lasting='${var.vpn_status_lasting_duration_critical}', at_least=${var.vpn_status_at_least_percentage_critical}%{endif} ))).publish('MAJOR') EOF + rule { + description = "is too low < ${var.vpn_status_threshold_major}" + severity = "Major" + detect_label = "MAJOR" + disabled = coalesce(var.vpn_status_disabled_major, var.vpn_status_disabled, var.detectors_disabled) + notifications = try(coalescelist(lookup(var.vpn_status_notifications, "major", []), var.notifications.major), null) + runbook_url = try(coalesce(var.vpn_status_runbook_url, var.runbook_url), "") + tip = var.vpn_status_tip + parameterized_subject = var.message_subject == "" ? local.rule_subject : var.message_subject + parameterized_body = var.message_body == "" ? local.rule_body : var.message_body + } + rule { description = "is too low < ${var.vpn_status_threshold_critical}" severity = "Critical" detect_label = "CRIT" - disabled = coalesce(var.vpn_status_disabled, var.detectors_disabled) + disabled = coalesce(var.vpn_status_disabled_critical, var.vpn_status_disabled, var.detectors_disabled) notifications = try(coalescelist(lookup(var.vpn_status_notifications, "critical", []), var.notifications.critical), null) runbook_url = try(coalesce(var.vpn_status_runbook_url, var.runbook_url), "") tip = var.vpn_status_tip diff --git a/modules/integration_aws-vpn/variables-gen.tf b/modules/integration_aws-vpn/variables-gen.tf index 3114b9a74..3f64f222a 100644 --- a/modules/integration_aws-vpn/variables-gen.tf +++ b/modules/integration_aws-vpn/variables-gen.tf @@ -92,10 +92,39 @@ variable "vpn_status_disabled" { default = null } +variable "vpn_status_disabled_major" { + description = "Disable major alerting rule for vpn_status detector" + type = bool + default = null +} + +variable "vpn_status_disabled_critical" { + description = "Disable critical alerting rule for vpn_status detector" + type = bool + default = null +} + +variable "vpn_status_threshold_major" { + description = "Major threshold for vpn_status detector" + type = number + default = 1 +} + +variable "vpn_status_lasting_duration_major" { + description = "Minimum duration that conditions must be true before raising alert" + type = string + default = null +} + +variable "vpn_status_at_least_percentage_major" { + description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" + type = number + default = 1 +} variable "vpn_status_threshold_critical" { description = "Critical threshold for vpn_status detector" type = number - default = 1 + default = 0.5 } variable "vpn_status_lasting_duration_critical" {