"""
Utilities for interacting with the nagios server
"""
import subprocess
from accre.exceptions import ACCREError
from accre.config import get_config
CONFIG = get_config()
[docs]class NagiosConnectError(ACCREError):
"""An error occurred connecting to the Nagios server"""
[docs]def send_nsca_notification(
host_name,
svc_description,
return_code=0,
plugin_output='',
timeout=60
):
"""
Send a nagios NSCA push notification to the configured nagios server.
See nagios NSCA passive check documentation for further details on
the formatting and meaning of the check parameters.
:param str host_name: the short name of the host associated with
the service in the service definition
:param str svc_description: the description of the service as specified
in the service definition
:param int return_code: the return code of the check (0-3)
:param str plugin_output: text output of the service check, may be truncated
if it is over approximately 5000 bytes in utf-8 format
:param int timeout: Time to wait for NSCA command to complete
"""
command = ['/usr/sbin/send_nsca', '-H', CONFIG['nagios']['server']]
proc = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
stdin=subprocess.PIPE
)
cmd_in = f'{host_name}\t{svc_description}\t{return_code}\t{plugin_output}'
stdout, stderr = proc.communicate(
input=cmd_in.encode('utf-8')[:5100],
timeout=timeout
)
if proc.returncode != 0:
msg = (
'NSCA push notification failed with exit code {0}: {1}, {2}.'
.format(proc.returncode, stdout, stderr)
)
raise NagiosConnectError(msg)
[docs]def retrieve_object_cache(timeout=60):
"""
Reads the Nagios object cache on the configured server and returns
a parser object containing the resulting parsed object cache data.
:param int timeout: Seconds to wait on object cache retrieval before
failing
:returns: Nagios object cache data
:rtype: ObjectCache
"""
command = [
'ssh',
CONFIG['nagios']['server'],
'cat',
CONFIG['nagios']['object-cache']
]
proc = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = proc.communicate(timeout=timeout)
if proc.returncode != 0:
msg = (
'Nagios cache retrieval failed with exit code {0}: {1}.'
.format(proc.returncode, stderr)
)
raise NagiosConnectError(msg)
oc = ObjectCache()
oc.parse(stdout.decode('utf-8'))
return oc
[docs]class ObjectCache:
"""
Parses and stores Nagios object cache file into a dictionary of object
types, i.e. command, hostgroup, host, service, etc... for each object type
a list of configured entities is the value. In each list each object
is a dictionary of keys and values specific to that object.
"""
def __init__(self):
self.result = {}
self.cur_type = None
self.cur_object = None
[docs] def managed_hosts(self, hostgroups=None):
"""
Return a list of currently managed hosts in the parsed object cache.
If no configuration has been parsed, the result will be empty.
:param list(str) hostgroups: If not None, restrict managed host names
returned to those in one or more of the given hostgroups
:returns: List of managed host names from the object cache
:rtype: list(string)
"""
if 'host' not in self.result:
return []
if hostgroups is None:
return [h['host_name'] for h in self.result['host']]
if 'hostgroup' not in self.result:
return []
managed_hgs = {
g['hostgroup_name']: g.get('members').split(',')
for g in self.result['hostgroup'] if 'members' in g
}
managed_hosts = [h['host_name'] for h in self.result['host']]
result = []
for host in managed_hosts:
for group in hostgroups:
if group not in managed_hgs:
continue
if host in managed_hgs[group]:
result.append(host)
break
return result
[docs] def parse(self, cache):
"""
Parse the object cache file and store all objects in the result
attribute. If multiple files are parsed, results will be aggregated
and duplicate objects may result in the list of objects multiple
times.
:param str cache: Nagios object cache file text to parse
:returns: The object cache result dictionary
:rtype: dict(str, list(dict(str, str)))
"""
for line in cache.splitlines():
# drop empty or comment lines
line = self._strip_comment(line)
line = line.strip()
if not line:
continue
# It appears that we can assume one object key/value
# pair per line. Define statements and closing braces
# seem to appear on their own lines, key/value pairs for
# objects seem to be on their own line. If we hit a key assume
# that everything until the end of line is the value
define_statement = False
for field in line.split():
# we're outside of an object definition
if self.cur_object is None:
if field == 'define':
define_statement = True
continue
elif define_statement:
self.cur_type = field
define_statement = False
continue
elif field == '{':
self.cur_object = {}
continue
# we're inside a function definition
else:
if field == '}':
self._store_current_object()
self.cur_object = None
self.cur_type = None
continue
else:
key = field
value = line.partition(field)[2].strip()
self.cur_object[key] = value
break
return self.result
def _store_current_object(self):
"""
Place the current object in the correct place in the dictionary
"""
if self.cur_type not in self.result:
self.result[self.cur_type] = []
self.result[self.cur_type].append(self.cur_object)
def _strip_comment(self, line):
"""
Get rid of everything past the first #
This is not smart about quotes and could truncate command values
in principle
"""
result = []
for char in line:
if char == '#':
break
result.append(char)
return ''.join(result)
[docs]def retrieve_current_status(timeout=60):
"""
Reads the Nagios status file on the configured server and returns
a parser object containing the resulting parsed status data.
:param int timeout: Seconds to wait on status file retrieval before
failing
:returns: Nagios current status data
:rtype: NagiosStatus
"""
command = [
'ssh',
CONFIG['nagios']['server'],
'cat',
CONFIG['nagios']['status-file']
]
proc = subprocess.Popen(
command, stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
stdout, stderr = proc.communicate(timeout=timeout)
if proc.returncode != 0:
msg = (
'Nagios status retrieval failed with exit code {0}: {1}.'
.format(proc.returncode, stderr)
)
raise NagiosConnectError(msg)
ns = NagiosStatus()
ns.parse(stdout.decode('utf-8'))
return ns
[docs]class NagiosStatus:
"""
Parses and stores Nagios status file into a dictionary of object
types, i.e. hostdowntime, servicestatus, etc... for each object type
a list of configured entities is the value. In each list each object
is a dictionary of keys and values specific to that object.
"""
def __init__(self):
self.result = {}
self.cur_type = None
self.cur_object = None
[docs] def unhandled_hosts_down(self):
"""
Return a list of hosts that are in a hard down state and have
not been acknowledged.
:returns: List of hosts in an unacknowledged hard down state
:rtype: list(str)
"""
if 'hoststatus' not in self.result:
return []
hosts = self.result['hoststatus']
result = []
for host in hosts:
name = host['host_name']
if host['current_state'] != '1':
continue
if host['last_hard_state'] != '1':
continue
if host['problem_has_been_acknowledged'] != '0':
continue
result.append(name)
return result
[docs] def service_status_time(self, host, serv_desc):
"""
Returns a tuple containing the hard status code,
0 (OK) - 3 (UNKNOWN) for a service and status
for the given host and service description,
and the timestamp of the last hard
status change for the service on the host.
Rasies a ValueError if the service on the host
is not found.
:param str host: Name of the host to report
:param str serv_desc: Service description identifier
:returns: tuple of the hard status code and
a timestamp of the last hard status change
:rtype: tuple(int, int)
"""
services = self.result['servicestatus']
hosts = self.result['hoststatus']
serv_state = [
int(x['last_hard_state']) for x in services
if x['host_name'] == host
and x['service_description'] == serv_desc
]
serv_time = [
int(x['last_hard_state_change']) for x in services
if x['host_name'] == host
and x['service_description'] == serv_desc
]
if not serv_state:
raise ValueError(
'No nagios service checks found for host {} with desc "{}"'
.format(host, serv_desc)
)
return serv_state[0], serv_time[0]
[docs] def host_aggregate_status_time(self, host):
"""
Returns a tuple containing the highest hard status code,
0 (OK) - 3 (UNKNOWN) for all services and host status
for the given host, and the timestamp of the last hard
status change for any service on the host or the host
itself. Rasies a ValueError if no checks are found for
the host.
:param str host: Name of the host to report
:returns: tuple of the highest hard status code and
a timestamp of the last hard status change
:rtype: tuple(int, int)
"""
services = self.result['servicestatus']
hosts = self.result['hoststatus']
serv_states = [
int(x['last_hard_state']) for x in services
if x['host_name'] == host
]
serv_times = [
int(x['last_hard_state_change']) for x in services
if x['host_name'] == host
]
host_states = [
int(x['last_hard_state']) for x in hosts
if x['host_name'] == host
]
host_times = [
int(x['last_hard_state_change']) for x in hosts
if x['host_name'] == host
]
states = serv_states + host_states
times = serv_times + host_times
if not states:
raise ValueError(
'No nagios host or service checks found for {}'
.format(host)
)
return max(states), max(times)
[docs] def parse(self, status):
"""
Parse the nagios status file and store all objects in the result
attribute. If multiple files are parsed, results will be aggregated
and duplicate objects may result in the list of objects multiple
times.
:param str status: Nagios status file text to parse
:returns: The Nagios status result dictionary
:rtype: dict(str, list(dict(str, str)))
"""
for line in status.splitlines():
# drop empty or comment lines
line = self._strip_comment(line)
line = line.strip()
if not line:
continue
# It appears that we can assume one object key/value
# pair per line. Define statements and closing braces
# seem to appear on their own lines, key/value pairs for
# objects seem to be on their own line. If we hit a key assume
# that everything until the end of line is the value
for field in line.split():
# we're outside of an object definition
if self.cur_object is None:
if field != '{':
self.cur_type = field
else:
self.cur_object = {}
continue
# we're inside an object definition
else:
if field == '}':
self._store_current_object()
self.cur_object = None
self.cur_type = None
continue
else:
# Resplit the line by '=' character
eq_fields = line.split('=')
key = eq_fields[0]
value = '='.join(eq_fields[1:])
self.cur_object[key] = value
break
return self.result
def _store_current_object(self):
"""
Place the current object in the correct place in the dictionary
"""
if self.cur_type not in self.result:
self.result[self.cur_type] = []
self.result[self.cur_type].append(self.cur_object)
def _strip_comment(self, line):
"""
Get rid of everything past the first #
This is not smart about quotes and could truncate command values
in principle
"""
result = []
for char in line:
if char == '#':
break
result.append(char)
return ''.join(result)