From 0cf92857260860aa07cdef6f926e0d8d85694780 Mon Sep 17 00:00:00 2001 From: cmk-bonobo Date: Sun, 4 May 2025 13:10:23 +0200 Subject: [PATCH] added section udp_components --- agent_based/traefik_udp_components.py | 154 ++++++++++++++++++++++++++ checkman/traefik_info | 2 +- checkman/traefik_udp_components | 11 ++ graphing/graph_traefik.py | 28 ++++- rulesets/rs_traefik_http.py | 2 +- rulesets/rs_traefik_tcp.py | 2 +- rulesets/rs_traefik_udp.py | 98 ++++++++++++++++ 7 files changed, 293 insertions(+), 4 deletions(-) create mode 100644 agent_based/traefik_udp_components.py create mode 100644 checkman/traefik_udp_components create mode 100644 rulesets/rs_traefik_udp.py diff --git a/agent_based/traefik_udp_components.py b/agent_based/traefik_udp_components.py new file mode 100644 index 0000000..de26e2c --- /dev/null +++ b/agent_based/traefik_udp_components.py @@ -0,0 +1,154 @@ +#!/usr/bin/env python3 +# pylint: disable=missing-module-docstring, unused-argument, consider-using-f-string +# pylint: disable=missing-function-docstring, line-too-long + +from pprint import pprint + +from cmk.utils import debug + +# import necessary elements from API version 2 +from cmk.agent_based.v2 import ( + AgentSection, + CheckPlugin, + Service, + State, + Metric, + Result, + DiscoveryResult, + CheckResult, + check_levels, +) + + +def get_state_upper( + levels: tuple[int | float, int | float], value: int | float +) -> State: + """returns OK/WARN/CRIT depending on the given parameters""" + warn, crit = levels + if value >= crit: + return State.CRIT + if value >= warn: + return State.WARN + return State.OK + + +def parse_traefik_udp_components(string_table): + """the parse function""" + parsed_data = {} + for line in string_table: + if line[0] == "routers": + parsed_data["num_routers"] = int(line[1]) + parsed_data["num_routers_warn"] = int(line[2]) + parsed_data["num_routers_crit"] = int(line[3]) + elif line[0] == "services": + parsed_data["num_services"] = int(line[1]) + parsed_data["num_services_warn"] = int(line[2]) + parsed_data["num_services_crit"] = int(line[3]) + return parsed_data + + +def discover_traefik_udp_components(section) -> DiscoveryResult: + """the discover function""" + yield Service() + + +def check_traefik_udp_components(params, section) -> CheckResult: + """the check function""" + if debug.enabled(): + pprint(section) + _level_type, levels_percent_not_ok = params["levels_traefik_udp_components_not_ok"] + levels_min_routers = params["levels_traefik_min_udp_routers"] + levels_max_routers = params["levels_traefik_max_udp_routers"] + levels_min_services = params["levels_traefik_min_udp_services"] + levels_max_services = params["levels_traefik_max_udp_services"] + num_routers: int = section["num_routers"] + num_routers_warn: int = section["num_routers_warn"] + num_routers_crit: int = section["num_routers_crit"] + num_services: int = section["num_services"] + num_services_warn: int = section["num_services_warn"] + num_services_crit: int = section["num_services_crit"] + num_components: int = num_routers + num_services + components_warn: int = num_routers_warn + num_services_warn + components_crit: int = num_routers_crit + num_services_crit + components_percent_not_ok: float = 0.0 + if num_components > 0: + components_percent_not_ok = ( + (components_warn + components_crit) * 100 / num_components + ) + yield Metric( + name="traefik_percent_udp_components_not_ok", + value=components_percent_not_ok, + levels=levels_percent_not_ok, + ) + summary: str = f"Number of UDP routers/services: {num_routers}/{num_services}" + details_routers: str = ( + f"Routers WARN: {num_routers_warn}\nRouters CRIT: {num_routers_crit}" + ) + details_services: str = ( + f"Services WARN: {num_services_warn}\nServices CRIT: {num_services_crit}" + ) + details = f"{details_routers}\n\n{details_services}" + state: State = State.OK + if components_warn > 0: + state = State.WARN + if components_crit > 0: + state = State.CRIT + yield Result( + state=state, + summary=summary, + details=details, + ) + yield from check_levels( + metric_name="traefik_num_udp_routers", + value=num_routers, + levels_lower=levels_min_routers, + levels_upper=levels_max_routers, + label="Number of UDP routers", + notice_only=True, + render_func=lambda v: "%.0f" % v, + ) + yield from check_levels( + metric_name="traefik_num_udp_services", + value=num_services, + levels_lower=levels_min_services, + levels_upper=levels_max_services, + label="Number of UDP services", + notice_only=True, + render_func=lambda v: "%.0f" % v, + ) + + +# create the new agent section, must begin with "agent_section_" +# and must be an instance of "AgentSection" +agent_section_traefik_udp_components = AgentSection( + # "name" must exactly match the section name within the agent output + name="traefik_udp_components", + # define the parse function, name is arbitrary, a good choice is to choose + # "parse_" as prefix and append the section name + parse_function=parse_traefik_udp_components, +) + +# create the new check plugin, must begin with "check_plugin_" +# and must be an instance of "CheckPlugin" +check_plugin_traefik_udp_components = CheckPlugin( + # "name" should be the same as the corresponding section within the agent output + name="traefik_udp_components", + service_name="Traefik UDP components", + # define the discovery function, name is arbitrary, a good choice is to choose + # "discover_" as prefix and append the section name + discovery_function=discover_traefik_udp_components, + # define the check function, name is arbitrary, a good choice is to choose + # "check_" as prefix and append the section name + check_function=check_traefik_udp_components, + # define the default parameters + check_default_parameters={ + "levels_traefik_min_udp_routers": ("fixed", (0, 0)), + "levels_traefik_max_udp_routers": ("fixed", (25, 50)), + "levels_traefik_min_udp_services": ("fixed", (0, 0)), + "levels_traefik_max_udp_services": ("fixed", (25, 50)), + "levels_traefik_udp_components_not_ok": ("fixed", (0.5, 1.0)), + }, + # connect to the ruleset where parameters can be defined + # must match the name of the ruleset exactly + check_ruleset_name="traefik_udp_components", +) diff --git a/checkman/traefik_info b/checkman/traefik_info index d666d32..bb75f55 100644 --- a/checkman/traefik_info +++ b/checkman/traefik_info @@ -1,4 +1,4 @@ -title: Traefik: Various information +title: Traefik Various information agents: linux catalog: unsorted license: GPL diff --git a/checkman/traefik_udp_components b/checkman/traefik_udp_components new file mode 100644 index 0000000..3627a05 --- /dev/null +++ b/checkman/traefik_udp_components @@ -0,0 +1,11 @@ +title: Traefik UDP components +agents: linux +catalog: unsorted +license: GPL +distribution: check_mk +description: + Shows total number of UDP routers/services. + The check will raise WARN/CRIT if the min/max numbers of routers/services are below/above the configurable levels. + The check will raise WARN/CRIT if the overall number of components in not OK state (reported directly from the Traefik API) is above the configurable levels. +inventory: + one service is created (with several details) \ No newline at end of file diff --git a/graphing/graph_traefik.py b/graphing/graph_traefik.py index 425affa..7b1940b 100644 --- a/graphing/graph_traefik.py +++ b/graphing/graph_traefik.py @@ -7,6 +7,8 @@ from cmk.graphing.v1 import Title from cmk.graphing.v1.metrics import Color, DecimalNotation, Metric, Unit from cmk.graphing.v1.perfometers import Closed, FocusRange, Perfometer + +# info section metric_traefik_agent_execution_time = Metric( # "name" must be exactly the "metric_name" within the check function name="traefik_agent_execution_time", @@ -15,6 +17,7 @@ metric_traefik_agent_execution_time = Metric( color=Color.DARK_ORANGE, ) +# HTTP section metric_traefik_num_http_routers = Metric( name="traefik_num_http_routers", title=Title("Number of HTTP routers"), @@ -44,6 +47,7 @@ metric_traefik_percent_http_components_not_ok = Metric( color=Color.DARK_RED, ) +# TCP section metric_traefik_num_tcp_routers = Metric( name="traefik_num_tcp_routers", title=Title("Number of TCP routers"), @@ -65,7 +69,6 @@ metric_traefik_num_tcp_middlewares = Metric( color=Color.LIGHT_RED, ) - metric_traefik_percent_tcp_components_not_ok = Metric( name="traefik_percent_tcp_components_not_ok", title=Title("Percent of TCP components in not OK state"), @@ -73,7 +76,30 @@ metric_traefik_percent_tcp_components_not_ok = Metric( color=Color.DARK_RED, ) +# UDP section +metric_traefik_num_udp_routers = Metric( + name="traefik_num_udp_routers", + title=Title("Number of UDP routers"), + unit=Unit(DecimalNotation("")), + color=Color.LIGHT_GREEN, +) +metric_traefik_num_udp_services = Metric( + name="traefik_num_udp_services", + title=Title("Number of UDP services"), + unit=Unit(DecimalNotation("")), + color=Color.LIGHT_BLUE, +) + +metric_traefik_percent_udp_components_not_ok = Metric( + name="traefik_percent_udp_components_not_ok", + title=Title("Percent of UDP components in not OK state"), + unit=Unit(DecimalNotation("%")), + color=Color.DARK_RED, +) + + +# Perfometers perfometer_traefik_agent_execution_time = Perfometer( name="traefik_agent_execution_time", focus_range=FocusRange(Closed(0), Closed(100)), diff --git a/rulesets/rs_traefik_http.py b/rulesets/rs_traefik_http.py index 3572ec3..10cab62 100644 --- a/rulesets/rs_traefik_http.py +++ b/rulesets/rs_traefik_http.py @@ -95,7 +95,7 @@ def _parameter_form(): parameter_form=SimpleLevels( title=Title("Levels for percentage of not OK HTTP components"), help_text=Help( - "Define the levels for the maximum number of HTTP routers in not OK state" + "Define the levels for the maximum number of HTTP components in not OK state" ), form_spec_template=Float(unit_symbol="%"), level_direction=LevelDirection.UPPER, diff --git a/rulesets/rs_traefik_tcp.py b/rulesets/rs_traefik_tcp.py index 0998a2e..bb14862 100644 --- a/rulesets/rs_traefik_tcp.py +++ b/rulesets/rs_traefik_tcp.py @@ -95,7 +95,7 @@ def _parameter_form(): parameter_form=SimpleLevels( title=Title("Levels for percentage of not OK TCP components"), help_text=Help( - "Define the levels for the maximum number of TCP routers in not OK state" + "Define the levels for the maximum number of TCP components in not OK state" ), form_spec_template=Float(unit_symbol="%"), level_direction=LevelDirection.UPPER, diff --git a/rulesets/rs_traefik_udp.py b/rulesets/rs_traefik_udp.py new file mode 100644 index 0000000..3cfe8e9 --- /dev/null +++ b/rulesets/rs_traefik_udp.py @@ -0,0 +1,98 @@ +#!/user/bin/env python3 +"""UDP components parameter form for Traefik""" + +from cmk.rulesets.v1 import Title, Help +from cmk.rulesets.v1.form_specs import ( + DictElement, + Dictionary, + SimpleLevels, + DefaultValue, + Integer, + LevelDirection, + Float, +) + +from cmk.rulesets.v1.rule_specs import CheckParameters, Topic, HostCondition + + +# function name should begin with an underscore to limit it's visibility +def _parameter_form(): + return Dictionary( + elements={ + "levels_traefik_min_udp_routers": DictElement( + parameter_form=SimpleLevels( + title=Title("Levels for minimum number of UDP routers"), + help_text=Help( + "Define the levels for the minimum number of UDP routers" + ), + form_spec_template=Integer(unit_symbol=""), + level_direction=LevelDirection.LOWER, + prefill_fixed_levels=DefaultValue(value=(0, 0)), + ), + required=True, + ), + "levels_traefik_max_udp_routers": DictElement( + parameter_form=SimpleLevels( + title=Title("Levels for maximum number of UDP routers"), + help_text=Help( + "Define the levels for the maximum number of UDP routers" + ), + form_spec_template=Integer(unit_symbol=""), + level_direction=LevelDirection.UPPER, + prefill_fixed_levels=DefaultValue(value=(25, 50)), + ), + required=True, + ), + "levels_traefik_min_udp_services": DictElement( + parameter_form=SimpleLevels( + title=Title("Levels for minimum number of UDP services"), + help_text=Help( + "Define the levels for the minimum number of UDP services" + ), + form_spec_template=Integer(unit_symbol=""), + level_direction=LevelDirection.LOWER, + prefill_fixed_levels=DefaultValue(value=(0, 0)), + ), + required=True, + ), + "levels_traefik_max_udp_services": DictElement( + parameter_form=SimpleLevels( + title=Title("Levels for maximum number of UDP services"), + help_text=Help( + "Define the levels for the maximum number of UDP services" + ), + form_spec_template=Integer(unit_symbol=""), + level_direction=LevelDirection.UPPER, + prefill_fixed_levels=DefaultValue(value=(25, 50)), + ), + required=True, + ), + "levels_traefik_udp_components_not_ok": DictElement( + parameter_form=SimpleLevels( + title=Title("Levels for percentage of not OK UDP components"), + help_text=Help( + "Define the levels for the maximum number of UDP components in not OK state" + ), + form_spec_template=Float(unit_symbol="%"), + level_direction=LevelDirection.UPPER, + prefill_fixed_levels=DefaultValue(value=(0.5, 1.0)), + ), + required=True, + ), + } + ) + + +# name must begin with "rule_spec_", should refer to the used check plugin +# must be an instance of "CheckParameters" +rule_spec_traefik_udp_components = CheckParameters( + # "name" should be the same as the check plugin + name="traefik_udp_components", + # the title is shown in the GUI + title=Title("Traefik UDP components parameters"), + # this ruleset can be found under Setup|Service monitoring rules|Applications... + topic=Topic.APPLICATIONS, + # define the name of the function which creates the GUI elements + parameter_form=_parameter_form, + condition=HostCondition(), +)