Source code for accre.nagios

"""
Utilities for interacting with the nagios server
"""
import subprocess

from accre.exceptions import ACCREError
from accre.config import get_config


CONFIG = get_config()


[docs]class NagiosConnectError(ACCREError): """An error occurred connecting to the Nagios server"""
[docs]def send_nsca_notification( host_name, svc_description, return_code=0, plugin_output='', timeout=60 ): """ Send a nagios NSCA push notification to the configured nagios server. See nagios NSCA passive check documentation for further details on the formatting and meaning of the check parameters. :param str host_name: the short name of the host associated with the service in the service definition :param str svc_description: the description of the service as specified in the service definition :param int return_code: the return code of the check (0-3) :param str plugin_output: text output of the service check, may be truncated if it is over approximately 5000 bytes in utf-8 format :param int timeout: Time to wait for NSCA command to complete """ command = ['/usr/sbin/send_nsca', '-H', CONFIG['nagios']['server']] proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE ) cmd_in = f'{host_name}\t{svc_description}\t{return_code}\t{plugin_output}' stdout, stderr = proc.communicate( input=cmd_in.encode('utf-8')[:5100], timeout=timeout ) if proc.returncode != 0: msg = ( 'NSCA push notification failed with exit code {0}: {1}, {2}.' .format(proc.returncode, stdout, stderr) ) raise NagiosConnectError(msg)
[docs]def retrieve_object_cache(timeout=60): """ Reads the Nagios object cache on the configured server and returns a parser object containing the resulting parsed object cache data. :param int timeout: Seconds to wait on object cache retrieval before failing :returns: Nagios object cache data :rtype: ObjectCache """ command = [ 'ssh', CONFIG['nagios']['server'], 'cat', CONFIG['nagios']['object-cache'] ] proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = proc.communicate(timeout=timeout) if proc.returncode != 0: msg = ( 'Nagios cache retrieval failed with exit code {0}: {1}.' .format(proc.returncode, stderr) ) raise NagiosConnectError(msg) oc = ObjectCache() oc.parse(stdout.decode('utf-8')) return oc
[docs]class ObjectCache: """ Parses and stores Nagios object cache file into a dictionary of object types, i.e. command, hostgroup, host, service, etc... for each object type a list of configured entities is the value. In each list each object is a dictionary of keys and values specific to that object. """ def __init__(self): self.result = {} self.cur_type = None self.cur_object = None
[docs] def managed_hosts(self, hostgroups=None): """ Return a list of currently managed hosts in the parsed object cache. If no configuration has been parsed, the result will be empty. :param list(str) hostgroups: If not None, restrict managed host names returned to those in one or more of the given hostgroups :returns: List of managed host names from the object cache :rtype: list(string) """ if 'host' not in self.result: return [] if hostgroups is None: return [h['host_name'] for h in self.result['host']] if 'hostgroup' not in self.result: return [] managed_hgs = { g['hostgroup_name']: g.get('members').split(',') for g in self.result['hostgroup'] if 'members' in g } managed_hosts = [h['host_name'] for h in self.result['host']] result = [] for host in managed_hosts: for group in hostgroups: if group not in managed_hgs: continue if host in managed_hgs[group]: result.append(host) break return result
[docs] def parse(self, cache): """ Parse the object cache file and store all objects in the result attribute. If multiple files are parsed, results will be aggregated and duplicate objects may result in the list of objects multiple times. :param str cache: Nagios object cache file text to parse :returns: The object cache result dictionary :rtype: dict(str, list(dict(str, str))) """ for line in cache.splitlines(): # drop empty or comment lines line = self._strip_comment(line) line = line.strip() if not line: continue # It appears that we can assume one object key/value # pair per line. Define statements and closing braces # seem to appear on their own lines, key/value pairs for # objects seem to be on their own line. If we hit a key assume # that everything until the end of line is the value define_statement = False for field in line.split(): # we're outside of an object definition if self.cur_object is None: if field == 'define': define_statement = True continue elif define_statement: self.cur_type = field define_statement = False continue elif field == '{': self.cur_object = {} continue # we're inside a function definition else: if field == '}': self._store_current_object() self.cur_object = None self.cur_type = None continue else: key = field value = line.partition(field)[2].strip() self.cur_object[key] = value break return self.result
def _store_current_object(self): """ Place the current object in the correct place in the dictionary """ if self.cur_type not in self.result: self.result[self.cur_type] = [] self.result[self.cur_type].append(self.cur_object) def _strip_comment(self, line): """ Get rid of everything past the first # This is not smart about quotes and could truncate command values in principle """ result = [] for char in line: if char == '#': break result.append(char) return ''.join(result)
[docs]def retrieve_current_status(timeout=60): """ Reads the Nagios status file on the configured server and returns a parser object containing the resulting parsed status data. :param int timeout: Seconds to wait on status file retrieval before failing :returns: Nagios current status data :rtype: NagiosStatus """ command = [ 'ssh', CONFIG['nagios']['server'], 'cat', CONFIG['nagios']['status-file'] ] proc = subprocess.Popen( command, stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = proc.communicate(timeout=timeout) if proc.returncode != 0: msg = ( 'Nagios status retrieval failed with exit code {0}: {1}.' .format(proc.returncode, stderr) ) raise NagiosConnectError(msg) ns = NagiosStatus() ns.parse(stdout.decode('utf-8')) return ns
[docs]class NagiosStatus: """ Parses and stores Nagios status file into a dictionary of object types, i.e. hostdowntime, servicestatus, etc... for each object type a list of configured entities is the value. In each list each object is a dictionary of keys and values specific to that object. """ def __init__(self): self.result = {} self.cur_type = None self.cur_object = None
[docs] def unhandled_hosts_down(self): """ Return a list of hosts that are in a hard down state and have not been acknowledged. :returns: List of hosts in an unacknowledged hard down state :rtype: list(str) """ if 'hoststatus' not in self.result: return [] hosts = self.result['hoststatus'] result = [] for host in hosts: name = host['host_name'] if host['current_state'] != '1': continue if host['last_hard_state'] != '1': continue if host['problem_has_been_acknowledged'] != '0': continue result.append(name) return result
[docs] def service_status_time(self, host, serv_desc): """ Returns a tuple containing the hard status code, 0 (OK) - 3 (UNKNOWN) for a service and status for the given host and service description, and the timestamp of the last hard status change for the service on the host. Rasies a ValueError if the service on the host is not found. :param str host: Name of the host to report :param str serv_desc: Service description identifier :returns: tuple of the hard status code and a timestamp of the last hard status change :rtype: tuple(int, int) """ services = self.result['servicestatus'] hosts = self.result['hoststatus'] serv_state = [ int(x['last_hard_state']) for x in services if x['host_name'] == host and x['service_description'] == serv_desc ] serv_time = [ int(x['last_hard_state_change']) for x in services if x['host_name'] == host and x['service_description'] == serv_desc ] if not serv_state: raise ValueError( 'No nagios service checks found for host {} with desc "{}"' .format(host, serv_desc) ) return serv_state[0], serv_time[0]
[docs] def host_aggregate_status_time(self, host): """ Returns a tuple containing the highest hard status code, 0 (OK) - 3 (UNKNOWN) for all services and host status for the given host, and the timestamp of the last hard status change for any service on the host or the host itself. Rasies a ValueError if no checks are found for the host. :param str host: Name of the host to report :returns: tuple of the highest hard status code and a timestamp of the last hard status change :rtype: tuple(int, int) """ services = self.result['servicestatus'] hosts = self.result['hoststatus'] serv_states = [ int(x['last_hard_state']) for x in services if x['host_name'] == host ] serv_times = [ int(x['last_hard_state_change']) for x in services if x['host_name'] == host ] host_states = [ int(x['last_hard_state']) for x in hosts if x['host_name'] == host ] host_times = [ int(x['last_hard_state_change']) for x in hosts if x['host_name'] == host ] states = serv_states + host_states times = serv_times + host_times if not states: raise ValueError( 'No nagios host or service checks found for {}' .format(host) ) return max(states), max(times)
[docs] def parse(self, status): """ Parse the nagios status file and store all objects in the result attribute. If multiple files are parsed, results will be aggregated and duplicate objects may result in the list of objects multiple times. :param str status: Nagios status file text to parse :returns: The Nagios status result dictionary :rtype: dict(str, list(dict(str, str))) """ for line in status.splitlines(): # drop empty or comment lines line = self._strip_comment(line) line = line.strip() if not line: continue # It appears that we can assume one object key/value # pair per line. Define statements and closing braces # seem to appear on their own lines, key/value pairs for # objects seem to be on their own line. If we hit a key assume # that everything until the end of line is the value for field in line.split(): # we're outside of an object definition if self.cur_object is None: if field != '{': self.cur_type = field else: self.cur_object = {} continue # we're inside an object definition else: if field == '}': self._store_current_object() self.cur_object = None self.cur_type = None continue else: # Resplit the line by '=' character eq_fields = line.split('=') key = eq_fields[0] value = '='.join(eq_fields[1:]) self.cur_object[key] = value break return self.result
def _store_current_object(self): """ Place the current object in the correct place in the dictionary """ if self.cur_type not in self.result: self.result[self.cur_type] = [] self.result[self.cur_type].append(self.cur_object) def _strip_comment(self, line): """ Get rid of everything past the first # This is not smart about quotes and could truncate command values in principle """ result = [] for char in line: if char == '#': break result.append(char) return ''.join(result)