/** * collectd - src/utils_threshold.c * Copyright (C) 2007,2008 Florian octo Forster * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the * Free Software Foundation; only version 2 of the License is applicable. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * * Author: * Florian octo Forster **/ #include "collectd.h" #include "common.h" #include "plugin.h" #include "utils_avltree.h" #include "utils_cache.h" #include #include /* * Private data structures * {{{ */ #define UT_FLAG_INVERT 0x01 #define UT_FLAG_PERSIST 0x02 typedef struct threshold_s { char host[DATA_MAX_NAME_LEN]; char plugin[DATA_MAX_NAME_LEN]; char plugin_instance[DATA_MAX_NAME_LEN]; char type[DATA_MAX_NAME_LEN]; char type_instance[DATA_MAX_NAME_LEN]; char data_source[DATA_MAX_NAME_LEN]; gauge_t warning_min; gauge_t warning_max; gauge_t failure_min; gauge_t failure_max; int flags; struct threshold_s *next; } threshold_t; /* }}} */ /* * Private (static) variables * {{{ */ static c_avl_tree_t *threshold_tree = NULL; static pthread_mutex_t threshold_lock = PTHREAD_MUTEX_INITIALIZER; /* }}} */ /* * Threshold management * ==================== * The following functions add, delete, search, etc. configured thresholds to * the underlying AVL trees. * {{{ */ static threshold_t *threshold_get (const char *hostname, const char *plugin, const char *plugin_instance, const char *type, const char *type_instance) { char name[6 * DATA_MAX_NAME_LEN]; threshold_t *th = NULL; format_name (name, sizeof (name), (hostname == NULL) ? "" : hostname, (plugin == NULL) ? "" : plugin, plugin_instance, (type == NULL) ? "" : type, type_instance); name[sizeof (name) - 1] = '\0'; if (c_avl_get (threshold_tree, name, (void *) &th) == 0) return (th); else return (NULL); } /* threshold_t *threshold_get */ static int ut_threshold_add (const threshold_t *th) { char name[6 * DATA_MAX_NAME_LEN]; char *name_copy; threshold_t *th_copy; threshold_t *th_ptr; int status = 0; if (format_name (name, sizeof (name), th->host, th->plugin, th->plugin_instance, th->type, th->type_instance) != 0) { ERROR ("ut_threshold_add: format_name failed."); return (-1); } name_copy = strdup (name); if (name_copy == NULL) { ERROR ("ut_threshold_add: strdup failed."); return (-1); } th_copy = (threshold_t *) malloc (sizeof (threshold_t)); if (th_copy == NULL) { sfree (name_copy); ERROR ("ut_threshold_add: malloc failed."); return (-1); } memcpy (th_copy, th, sizeof (threshold_t)); th_ptr = NULL; DEBUG ("ut_threshold_add: Adding entry `%s'", name); pthread_mutex_lock (&threshold_lock); th_ptr = threshold_get (th->host, th->plugin, th->plugin_instance, th->type, th->type_instance); while ((th_ptr != NULL) && (th_ptr->next != NULL)) th_ptr = th_ptr->next; if (th_ptr == NULL) /* no such threshold yet */ { status = c_avl_insert (threshold_tree, name_copy, th_copy); } else /* th_ptr points to the last threshold in the list */ { th_ptr->next = th_copy; /* name_copy isn't needed */ sfree (name_copy); } pthread_mutex_unlock (&threshold_lock); if (status != 0) { ERROR ("ut_threshold_add: c_avl_insert (%s) failed.", name); sfree (name_copy); sfree (th_copy); } return (status); } /* int ut_threshold_add */ /* * End of the threshold management functions * }}} */ /* * Configuration * ============= * The following approximately two hundred functions are used to handle the * configuration and fill the threshold list. * {{{ */ static int ut_config_type_datasource (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `DataSource' option needs exactly one " "string argument."); return (-1); } sstrncpy (th->data_source, ci->values[0].value.string, sizeof (th->data_source)); return (0); } /* int ut_config_type_datasource */ static int ut_config_type_instance (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `Instance' option needs exactly one " "string argument."); return (-1); } sstrncpy (th->type_instance, ci->values[0].value.string, sizeof (th->type_instance)); return (0); } /* int ut_config_type_instance */ static int ut_config_type_max (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_NUMBER)) { WARNING ("threshold values: The `%s' option needs exactly one " "number argument.", ci->key); return (-1); } if (strcasecmp (ci->key, "WarningMax") == 0) th->warning_max = ci->values[0].value.number; else th->failure_max = ci->values[0].value.number; return (0); } /* int ut_config_type_max */ static int ut_config_type_min (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_NUMBER)) { WARNING ("threshold values: The `%s' option needs exactly one " "number argument.", ci->key); return (-1); } if (strcasecmp (ci->key, "WarningMin") == 0) th->warning_min = ci->values[0].value.number; else th->failure_min = ci->values[0].value.number; return (0); } /* int ut_config_type_min */ static int ut_config_type_invert (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN)) { WARNING ("threshold values: The `Invert' option needs exactly one " "boolean argument."); return (-1); } if (ci->values[0].value.boolean) th->flags |= UT_FLAG_INVERT; else th->flags &= ~UT_FLAG_INVERT; return (0); } /* int ut_config_type_invert */ static int ut_config_type_persist (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_BOOLEAN)) { WARNING ("threshold values: The `Persist' option needs exactly one " "boolean argument."); return (-1); } if (ci->values[0].value.boolean) th->flags |= UT_FLAG_PERSIST; else th->flags &= ~UT_FLAG_PERSIST; return (0); } /* int ut_config_type_persist */ static int ut_config_type (const threshold_t *th_orig, oconfig_item_t *ci) { int i; threshold_t th; int status = 0; if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `Type' block needs exactly one string " "argument."); return (-1); } if (ci->children_num < 1) { WARNING ("threshold values: The `Type' block needs at least one option."); return (-1); } memcpy (&th, th_orig, sizeof (th)); sstrncpy (th.type, ci->values[0].value.string, sizeof (th.type)); th.warning_min = NAN; th.warning_max = NAN; th.failure_min = NAN; th.failure_max = NAN; for (i = 0; i < ci->children_num; i++) { oconfig_item_t *option = ci->children + i; status = 0; if (strcasecmp ("Instance", option->key) == 0) status = ut_config_type_instance (&th, option); else if (strcasecmp ("DataSource", option->key) == 0) status = ut_config_type_datasource (&th, option); else if ((strcasecmp ("WarningMax", option->key) == 0) || (strcasecmp ("FailureMax", option->key) == 0)) status = ut_config_type_max (&th, option); else if ((strcasecmp ("WarningMin", option->key) == 0) || (strcasecmp ("FailureMin", option->key) == 0)) status = ut_config_type_min (&th, option); else if (strcasecmp ("Invert", option->key) == 0) status = ut_config_type_invert (&th, option); else if (strcasecmp ("Persist", option->key) == 0) status = ut_config_type_persist (&th, option); else { WARNING ("threshold values: Option `%s' not allowed inside a `Type' " "block.", option->key); status = -1; } if (status != 0) break; } if (status == 0) { status = ut_threshold_add (&th); } return (status); } /* int ut_config_type */ static int ut_config_plugin_instance (threshold_t *th, oconfig_item_t *ci) { if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `Instance' option needs exactly one " "string argument."); return (-1); } sstrncpy (th->plugin_instance, ci->values[0].value.string, sizeof (th->plugin_instance)); return (0); } /* int ut_config_plugin_instance */ static int ut_config_plugin (const threshold_t *th_orig, oconfig_item_t *ci) { int i; threshold_t th; int status = 0; if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `Plugin' block needs exactly one string " "argument."); return (-1); } if (ci->children_num < 1) { WARNING ("threshold values: The `Plugin' block needs at least one nested " "block."); return (-1); } memcpy (&th, th_orig, sizeof (th)); sstrncpy (th.plugin, ci->values[0].value.string, sizeof (th.plugin)); for (i = 0; i < ci->children_num; i++) { oconfig_item_t *option = ci->children + i; status = 0; if (strcasecmp ("Type", option->key) == 0) status = ut_config_type (&th, option); else if (strcasecmp ("Instance", option->key) == 0) status = ut_config_plugin_instance (&th, option); else { WARNING ("threshold values: Option `%s' not allowed inside a `Plugin' " "block.", option->key); status = -1; } if (status != 0) break; } return (status); } /* int ut_config_plugin */ static int ut_config_host (const threshold_t *th_orig, oconfig_item_t *ci) { int i; threshold_t th; int status = 0; if ((ci->values_num != 1) || (ci->values[0].type != OCONFIG_TYPE_STRING)) { WARNING ("threshold values: The `Host' block needs exactly one string " "argument."); return (-1); } if (ci->children_num < 1) { WARNING ("threshold values: The `Host' block needs at least one nested " "block."); return (-1); } memcpy (&th, th_orig, sizeof (th)); sstrncpy (th.host, ci->values[0].value.string, sizeof (th.host)); for (i = 0; i < ci->children_num; i++) { oconfig_item_t *option = ci->children + i; status = 0; if (strcasecmp ("Type", option->key) == 0) status = ut_config_type (&th, option); else if (strcasecmp ("Plugin", option->key) == 0) status = ut_config_plugin (&th, option); else { WARNING ("threshold values: Option `%s' not allowed inside a `Host' " "block.", option->key); status = -1; } if (status != 0) break; } return (status); } /* int ut_config_host */ int ut_config (const oconfig_item_t *ci) { int i; int status = 0; threshold_t th; if (ci->values_num != 0) { ERROR ("threshold values: The `Threshold' block may not have any " "arguments."); return (-1); } if (threshold_tree == NULL) { threshold_tree = c_avl_create ((void *) strcmp); if (threshold_tree == NULL) { ERROR ("ut_config: c_avl_create failed."); return (-1); } } memset (&th, '\0', sizeof (th)); th.warning_min = NAN; th.warning_max = NAN; th.failure_min = NAN; th.failure_max = NAN; for (i = 0; i < ci->children_num; i++) { oconfig_item_t *option = ci->children + i; status = 0; if (strcasecmp ("Type", option->key) == 0) status = ut_config_type (&th, option); else if (strcasecmp ("Plugin", option->key) == 0) status = ut_config_plugin (&th, option); else if (strcasecmp ("Host", option->key) == 0) status = ut_config_host (&th, option); else { WARNING ("threshold values: Option `%s' not allowed here.", option->key); status = -1; } if (status != 0) break; } return (status); } /* int um_config */ /* * End of the functions used to configure threshold values. */ /* }}} */ static threshold_t *threshold_search (const value_list_t *vl) { threshold_t *th; if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, vl->plugin_instance, vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, NULL, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, vl->plugin, NULL, vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get (vl->host, "", NULL, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get (vl->host, "", NULL, vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, vl->plugin_instance, vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, NULL, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", vl->plugin, NULL, vl->type, NULL)) != NULL) return (th); else if ((th = threshold_get ("", "", NULL, vl->type, vl->type_instance)) != NULL) return (th); else if ((th = threshold_get ("", "", NULL, vl->type, NULL)) != NULL) return (th); return (NULL); } /* threshold_t *threshold_search */ /* * int ut_report_state * * Checks if the `state' differs from the old state and creates a notification * if appropriate. * Does not fail. */ static int ut_report_state (const data_set_t *ds, const value_list_t *vl, const threshold_t *th, const gauge_t *values, int ds_index, int state) { /* {{{ */ int state_old; notification_t n; char *buf; size_t bufsize; int status; state_old = uc_get_state (ds, vl); /* If the state didn't change, only report if `persistent' is specified and * the state is not `okay'. */ if (state == state_old) { if ((th->flags & UT_FLAG_PERSIST) == 0) return (0); else if (state == STATE_OKAY) return (0); } if (state != state_old) uc_set_state (ds, vl, state); NOTIFICATION_INIT_VL (&n, vl, ds); buf = n.message; bufsize = sizeof (n.message); if (state == STATE_OKAY) n.severity = NOTIF_OKAY; else if (state == STATE_WARNING) n.severity = NOTIF_WARNING; else n.severity = NOTIF_FAILURE; n.time = vl->time; status = ssnprintf (buf, bufsize, "Host %s, plugin %s", vl->host, vl->plugin); buf += status; bufsize -= status; if (vl->plugin_instance[0] != '\0') { status = ssnprintf (buf, bufsize, " (instance %s)", vl->plugin_instance); buf += status; bufsize -= status; } status = ssnprintf (buf, bufsize, " type %s", vl->type); buf += status; bufsize -= status; if (vl->type_instance[0] != '\0') { status = ssnprintf (buf, bufsize, " (instance %s)", vl->type_instance); buf += status; bufsize -= status; } plugin_notification_meta_add_string (&n, "DataSource", ds->ds[ds_index].name); plugin_notification_meta_add_double (&n, "CurrentValue", values[ds_index]); plugin_notification_meta_add_double (&n, "WarningMin", th->warning_min); plugin_notification_meta_add_double (&n, "WarningMax", th->warning_max); plugin_notification_meta_add_double (&n, "FailureMin", th->failure_min); plugin_notification_meta_add_double (&n, "FailureMax", th->failure_max); /* Send an okay notification */ if (state == STATE_OKAY) { status = ssnprintf (buf, bufsize, ": All data sources are within range again."); buf += status; bufsize -= status; } else { double min; double max; min = (state == STATE_ERROR) ? th->failure_min : th->warning_min; max = (state == STATE_ERROR) ? th->failure_max : th->warning_max; if (th->flags & UT_FLAG_INVERT) { if (!isnan (min) && !isnan (max)) { status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " "%f. That is within the %s region of %f and %f.", ds->ds[ds_index].name, values[ds_index], (state == STATE_ERROR) ? "failure" : "warning", min, max); } else { status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " "%f. That is %s the %s threshold of %f.", ds->ds[ds_index].name, values[ds_index], isnan (min) ? "below" : "above", (state == STATE_ERROR) ? "failure" : "warning", isnan (min) ? max : min); } } else /* is not inverted */ { status = ssnprintf (buf, bufsize, ": Data source \"%s\" is currently " "%f. That is %s the %s threshold of %f.", ds->ds[ds_index].name, values[ds_index], (values[ds_index] < min) ? "below" : "above", (state == STATE_ERROR) ? "failure" : "warning", (values[ds_index] < min) ? min : max); } buf += status; bufsize -= status; } plugin_dispatch_notification (&n); plugin_notification_meta_free (n.meta); return (0); } /* }}} int ut_report_state */ /* * int ut_check_one_data_source * * Checks one data source against the given threshold configuration. If the * `DataSource' option is set in the threshold, and the name does NOT match, * `okay' is returned. If the threshold does match, its failure and warning * min and max values are checked and `failure' or `warning' is returned if * appropriate. * Does not fail. */ static int ut_check_one_data_source (const data_set_t *ds, const value_list_t __attribute__((unused)) *vl, const threshold_t *th, const gauge_t *values, int ds_index) { /* {{{ */ const char *ds_name; int is_warning = 0; int is_failure = 0; /* check if this threshold applies to this data source */ ds_name = ds->ds[ds_index].name; if ((th->data_source[0] != 0) && (strcmp (ds_name, th->data_source) != 0)) return (STATE_OKAY); if ((th->flags & UT_FLAG_INVERT) != 0) { is_warning--; is_failure--; } if ((!isnan (th->failure_min) && (th->failure_min > values[ds_index])) || (!isnan (th->failure_max) && (th->failure_max < values[ds_index]))) is_failure++; if (is_failure != 0) return (STATE_ERROR); if ((!isnan (th->warning_min) && (th->warning_min > values[ds_index])) || (!isnan (th->warning_max) && (th->warning_max < values[ds_index]))) is_warning++; if (is_warning != 0) return (STATE_WARNING); return (STATE_OKAY); } /* }}} int ut_check_one_data_source */ /* * int ut_check_one_threshold * * Checks all data sources of a value list against the given threshold, using * the ut_check_one_data_source function above. Returns the worst status, * which is `okay' if nothing has failed. * Returns less than zero if the data set doesn't have any data sources. */ static int ut_check_one_threshold (const data_set_t *ds, const value_list_t *vl, const threshold_t *th, const gauge_t *values, int *ret_ds_index) { /* {{{ */ int ret = -1; int ds_index = -1; int i; for (i = 0; i < ds->ds_num; i++) { int status; status = ut_check_one_data_source (ds, vl, th, values, i); if (ret < status) { ret = status; ds_index = i; } } /* for (ds->ds_num) */ if (ret_ds_index != NULL) *ret_ds_index = ds_index; return (ret); } /* }}} int ut_check_one_threshold */ /* * int ut_check_threshold (PUBLIC) * * Gets a list of matching thresholds and searches for the worst status by one * of the thresholds. Then reports that status using the ut_report_state * function above. * Returns zero on success and if no threshold has been configured. Returns * less than zero on failure. */ int ut_check_threshold (const data_set_t *ds, const value_list_t *vl) { /* {{{ */ threshold_t *th; gauge_t *values; int status; int worst_state = -1; threshold_t *worst_th = NULL; int worst_ds_index = -1; if (threshold_tree == NULL) return (0); /* Is this lock really necessary? So far, thresholds are only inserted at * startup. -octo */ pthread_mutex_lock (&threshold_lock); th = threshold_search (vl); pthread_mutex_unlock (&threshold_lock); if (th == NULL) return (0); DEBUG ("ut_check_threshold: Found matching threshold(s)"); values = uc_get_rate (ds, vl); if (values == NULL) return (0); while (th != NULL) { int ds_index = -1; status = ut_check_one_threshold (ds, vl, th, values, &ds_index); if (status < 0) { ERROR ("ut_check_threshold: ut_check_one_threshold failed."); sfree (values); return (-1); } if (worst_state < status) { worst_state = status; worst_th = th; worst_ds_index = ds_index; } th = th->next; } /* while (th) */ status = ut_report_state (ds, vl, worst_th, values, worst_ds_index, worst_state); if (status != 0) { ERROR ("ut_check_threshold: ut_report_state failed."); sfree (values); return (-1); } sfree (values); return (0); } /* }}} int ut_check_threshold */ /* * int ut_check_interesting (PUBLIC) * * Given an identification returns * 0: No threshold is defined. * 1: A threshold has been found. The flag `persist' is off. * 2: A threshold has been found. The flag `persist' is on. * (That is, it is expected that many notifications are sent until the * problem disappears.) */ int ut_check_interesting (const char *name) { /* {{{ */ char *name_copy = NULL; char *host = NULL; char *plugin = NULL; char *plugin_instance = NULL; char *type = NULL; char *type_instance = NULL; int status; data_set_t ds; value_list_t vl; threshold_t *th; /* If there is no tree nothing is interesting. */ if (threshold_tree == NULL) return (0); name_copy = strdup (name); if (name_copy == NULL) { ERROR ("ut_check_interesting: strdup failed."); return (-1); } status = parse_identifier (name_copy, &host, &plugin, &plugin_instance, &type, &type_instance); if (status != 0) { ERROR ("ut_check_interesting: parse_identifier failed."); sfree (name_copy); return (-1); } memset (&ds, '\0', sizeof (ds)); memset (&vl, '\0', sizeof (vl)); sstrncpy (vl.host, host, sizeof (vl.host)); sstrncpy (vl.plugin, plugin, sizeof (vl.plugin)); if (plugin_instance != NULL) sstrncpy (vl.plugin_instance, plugin_instance, sizeof (vl.plugin_instance)); sstrncpy (ds.type, type, sizeof (ds.type)); sstrncpy (vl.type, type, sizeof (vl.type)); if (type_instance != NULL) sstrncpy (vl.type_instance, type_instance, sizeof (vl.type_instance)); sfree (name_copy); host = plugin = plugin_instance = type = type_instance = NULL; th = threshold_search (&vl); if (th == NULL) return (0); if ((th->flags & UT_FLAG_PERSIST) == 0) return (1); return (2); } /* }}} int ut_check_interesting */ /* vim: set sw=2 ts=8 sts=2 tw=78 fdm=marker : */