|
|
@@ -39,7 +39,7 @@ class ComponentNonexistentError(Exception):
|
|
|
|
self.component_id = component_id
|
|
|
|
self.component_id = component_id
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
def __str__(self):
|
|
|
|
return repr('Component with id [%d] does not exist.' % (self.component_id,))
|
|
|
|
return repr(f'Component with id [{self.component_id}] does not exist.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class MetricNonexistentError(Exception):
|
|
|
|
class MetricNonexistentError(Exception):
|
|
|
@@ -49,7 +49,7 @@ class MetricNonexistentError(Exception):
|
|
|
|
self.metric_id = metric_id
|
|
|
|
self.metric_id = metric_id
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
def __str__(self):
|
|
|
|
return repr('Metric with id [%d] does not exist.' % (self.metric_id,))
|
|
|
|
return repr(f'Metric with id [{self.metric_id}] does not exist.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_current_status(endpoint_url, component_id, headers):
|
|
|
|
def get_current_status(endpoint_url, component_id, headers):
|
|
|
@@ -57,7 +57,7 @@ def get_current_status(endpoint_url, component_id, headers):
|
|
|
|
not exist or doesn't respond with the expected data.
|
|
|
|
not exist or doesn't respond with the expected data.
|
|
|
|
:return component status.
|
|
|
|
:return component status.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
get_status_request = requests.get('%s/components/%s' % (endpoint_url, component_id), headers=headers)
|
|
|
|
get_status_request = requests.get(f'{endpoint_url}/components/{component_id}', headers=headers)
|
|
|
|
|
|
|
|
|
|
|
|
if get_status_request.ok:
|
|
|
|
if get_status_request.ok:
|
|
|
|
# The component exists.
|
|
|
|
# The component exists.
|
|
|
@@ -69,7 +69,7 @@ def get_current_status(endpoint_url, component_id, headers):
|
|
|
|
def normalize_url(url):
|
|
|
|
def normalize_url(url):
|
|
|
|
"""If passed url doesn't include schema return it with default one - http."""
|
|
|
|
"""If passed url doesn't include schema return it with default one - http."""
|
|
|
|
if not url.lower().startswith('http'):
|
|
|
|
if not url.lower().startswith('http'):
|
|
|
|
return 'http://%s' % url
|
|
|
|
return f'http://{url}'
|
|
|
|
return url
|
|
|
|
return url
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -120,7 +120,7 @@ class Configuration(object):
|
|
|
|
os.environ.get('CACHET_PUBLIC_INCIDENTS') or self.data['cachet']['public_incidents'])
|
|
|
|
os.environ.get('CACHET_PUBLIC_INCIDENTS') or self.data['cachet']['public_incidents'])
|
|
|
|
|
|
|
|
|
|
|
|
self.logger.info('Monitoring URL: %s %s' % (self.endpoint_method, self.endpoint_url))
|
|
|
|
self.logger.info('Monitoring URL: %s %s' % (self.endpoint_method, self.endpoint_url))
|
|
|
|
self.expectations = [Expectaction.create(expectation) for expectation in self.data['endpoint']['expectation']]
|
|
|
|
self.expectations = [Expectation.create(expectation) for expectation in self.data['endpoint']['expectation']]
|
|
|
|
for expectation in self.expectations:
|
|
|
|
for expectation in self.expectations:
|
|
|
|
self.logger.info('Registered expectation: %s' % (expectation,))
|
|
|
|
self.logger.info('Registered expectation: %s' % (expectation,))
|
|
|
|
|
|
|
|
|
|
|
@@ -157,16 +157,15 @@ class Configuration(object):
|
|
|
|
configuration_errors.append('%s.%s' % (key, sub_key))
|
|
|
|
configuration_errors.append('%s.%s' % (key, sub_key))
|
|
|
|
|
|
|
|
|
|
|
|
if ('endpoint' in self.data and 'expectation' in
|
|
|
|
if ('endpoint' in self.data and 'expectation' in
|
|
|
|
self.data['endpoint']):
|
|
|
|
self.data['endpoint']):
|
|
|
|
if (not isinstance(self.data['endpoint']['expectation'], list) or
|
|
|
|
if (not isinstance(self.data['endpoint']['expectation'], list) or
|
|
|
|
(isinstance(self.data['endpoint']['expectation'], list) and
|
|
|
|
(isinstance(self.data['endpoint']['expectation'], list) and
|
|
|
|
len(self.data['endpoint']['expectation']) == 0)):
|
|
|
|
len(self.data['endpoint']['expectation']) == 0)):
|
|
|
|
configuration_errors.append('endpoint.expectation')
|
|
|
|
configuration_errors.append('endpoint.expectation')
|
|
|
|
|
|
|
|
|
|
|
|
if len(configuration_errors) > 0:
|
|
|
|
if len(configuration_errors) > 0:
|
|
|
|
raise ConfigurationValidationError(
|
|
|
|
raise ConfigurationValidationError(
|
|
|
|
'Config file [%s] failed validation. Missing keys: %s' % (self.config_file,
|
|
|
|
f"Config file [{self.config_file}] failed validation. Missing keys: {', '.join(configuration_errors)}")
|
|
|
|
', '.join(configuration_errors)))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def evaluate(self):
|
|
|
|
def evaluate(self):
|
|
|
|
"""Sends the request to the URL set in the configuration and executes
|
|
|
|
"""Sends the request to the URL set in the configuration and executes
|
|
|
@@ -175,9 +174,10 @@ class Configuration(object):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
if self.endpoint_header is not None:
|
|
|
|
if self.endpoint_header is not None:
|
|
|
|
self.request = requests.request(self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout, headers=self.endpoint_header)
|
|
|
|
self.request = requests.request(self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout,
|
|
|
|
|
|
|
|
headers=self.endpoint_header)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.request = requests.request(self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout)
|
|
|
|
self.request = requests.request(self.endpoint_method, self.endpoint_url, timeout=self.endpoint_timeout)
|
|
|
|
self.current_timestamp = int(time.time())
|
|
|
|
self.current_timestamp = int(time.time())
|
|
|
|
except requests.ConnectionError:
|
|
|
|
except requests.ConnectionError:
|
|
|
|
self.message = 'The URL is unreachable: %s %s' % (self.endpoint_method, self.endpoint_url)
|
|
|
|
self.message = 'The URL is unreachable: %s %s' % (self.endpoint_method, self.endpoint_url)
|
|
|
@@ -208,7 +208,7 @@ class Configuration(object):
|
|
|
|
self.logger.info(self.message)
|
|
|
|
self.logger.info(self.message)
|
|
|
|
|
|
|
|
|
|
|
|
def print_out(self):
|
|
|
|
def print_out(self):
|
|
|
|
self.logger.info('Current configuration:\n%s' % (self.__repr__()))
|
|
|
|
self.logger.info(f'Current configuration:\n{self.__repr__()}')
|
|
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
def __repr__(self):
|
|
|
|
temporary_data = copy.deepcopy(self.data)
|
|
|
|
temporary_data = copy.deepcopy(self.data)
|
|
|
@@ -224,7 +224,7 @@ class Configuration(object):
|
|
|
|
|
|
|
|
|
|
|
|
if self.status != 1:
|
|
|
|
if self.status != 1:
|
|
|
|
self.current_fails = self.current_fails + 1
|
|
|
|
self.current_fails = self.current_fails + 1
|
|
|
|
self.logger.info('Failure #%s with threshold set to %s' % (self.current_fails, self.allowed_fails))
|
|
|
|
self.logger.warning(f'Failure #{self.current_fails} with threshold set to {self.allowed_fails}')
|
|
|
|
if self.current_fails <= self.allowed_fails:
|
|
|
|
if self.current_fails <= self.allowed_fails:
|
|
|
|
self.trigger_update = False
|
|
|
|
self.trigger_update = False
|
|
|
|
return
|
|
|
|
return
|
|
|
@@ -276,8 +276,7 @@ class Configuration(object):
|
|
|
|
# Successful metrics upload
|
|
|
|
# Successful metrics upload
|
|
|
|
self.logger.info('Metric uploaded: %.6f %s' % (value, self.latency_unit))
|
|
|
|
self.logger.info('Metric uploaded: %.6f %s' % (value, self.latency_unit))
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.logger.warning('Metric upload failed with status [%d]' %
|
|
|
|
self.logger.warning(f'Metric upload failed with status [{metrics_request.status_code}]')
|
|
|
|
(metrics_request.status_code,))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def push_incident(self):
|
|
|
|
def push_incident(self):
|
|
|
|
"""If the component status has changed, we create a new incident (if this is the first time it becomes unstable)
|
|
|
|
"""If the component status has changed, we create a new incident (if this is the first time it becomes unstable)
|
|
|
@@ -291,36 +290,33 @@ class Configuration(object):
|
|
|
|
'component_status': self.status,
|
|
|
|
'component_status': self.status,
|
|
|
|
'notify': True}
|
|
|
|
'notify': True}
|
|
|
|
|
|
|
|
|
|
|
|
incident_request = requests.put('%s/incidents/%d' % (self.api_url, self.incident_id), params=params,
|
|
|
|
incident_request = requests.put(f'{self.api_url}/incidents/{self.incident_id}', params=params,
|
|
|
|
headers=self.headers)
|
|
|
|
headers=self.headers)
|
|
|
|
if incident_request.ok:
|
|
|
|
if incident_request.ok:
|
|
|
|
# Successful metrics upload
|
|
|
|
# Successful metrics upload
|
|
|
|
self.logger.info(
|
|
|
|
self.logger.info(
|
|
|
|
'Incident updated, API healthy again: component status [%d], message: "%s"' % (
|
|
|
|
f'Incident updated, API healthy again: component status [{self.status}], message: "{self.message}"')
|
|
|
|
self.status, self.message))
|
|
|
|
|
|
|
|
del self.incident_id
|
|
|
|
del self.incident_id
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.logger.warning('Incident update failed with status [%d], message: "%s"' % (
|
|
|
|
self.logger.warning(
|
|
|
|
incident_request.status_code, self.message))
|
|
|
|
f'Incident update failed with status [{incident_request.status_code}], message: "{self.message}"')
|
|
|
|
elif not hasattr(self, 'incident_id') and self.status != st.COMPONENT_STATUS_OPERATIONAL:
|
|
|
|
elif not hasattr(self, 'incident_id') and self.status != st.COMPONENT_STATUS_OPERATIONAL:
|
|
|
|
# This is the first time the incident is being created.
|
|
|
|
# This is the first time the incident is being created.
|
|
|
|
params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': self.public_incidents,
|
|
|
|
params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': self.public_incidents,
|
|
|
|
'component_id': self.component_id, 'component_status': self.status, 'notify': True}
|
|
|
|
'component_id': self.component_id, 'component_status': self.status, 'notify': True}
|
|
|
|
incident_request = requests.post('%s/incidents' % (self.api_url,), params=params, headers=self.headers)
|
|
|
|
incident_request = requests.post(f'{self.api_url}/incidents', params=params, headers=self.headers)
|
|
|
|
if incident_request.ok:
|
|
|
|
if incident_request.ok:
|
|
|
|
# Successful incident upload.
|
|
|
|
# Successful incident upload.
|
|
|
|
self.incident_id = incident_request.json()['data']['id']
|
|
|
|
self.incident_id = incident_request.json()['data']['id']
|
|
|
|
self.logger.info(
|
|
|
|
self.logger.info(
|
|
|
|
'Incident uploaded, API unhealthy: component status [%d], message: "%s"' % (
|
|
|
|
f'Incident uploaded, API unhealthy: component status [{self.status}], message: "{self.message}"')
|
|
|
|
self.status, self.message))
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
self.logger.warning(
|
|
|
|
self.logger.warning(
|
|
|
|
'Incident upload failed with status [%d], message: "%s"' % (
|
|
|
|
f'Incident upload failed with status [{incident_request.status_code}], message: "{self.message}"')
|
|
|
|
incident_request.status_code, self.message))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Expectaction(object):
|
|
|
|
class Expectation(object):
|
|
|
|
"""Base class for URL result expectations. Any new excpectation should extend
|
|
|
|
"""Base class for URL result expectations. Any new expectation should extend
|
|
|
|
this class and the name added to create() method.
|
|
|
|
this class and the name added to create() method.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
@@ -329,6 +325,7 @@ class Expectaction(object):
|
|
|
|
"""Creates a list of expectations based on the configuration types
|
|
|
|
"""Creates a list of expectations based on the configuration types
|
|
|
|
list.
|
|
|
|
list.
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
# If a need expectation is created, this is where we need to add it.
|
|
|
|
expectations = {
|
|
|
|
expectations = {
|
|
|
|
'HTTP_STATUS': HttpStatus,
|
|
|
|
'HTTP_STATUS': HttpStatus,
|
|
|
|
'LATENCY': Latency,
|
|
|
|
'LATENCY': Latency,
|
|
|
@@ -336,6 +333,9 @@ class Expectaction(object):
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return expectations.get(configuration['type'])(configuration)
|
|
|
|
return expectations.get(configuration['type'])(configuration)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, configuration):
|
|
|
|
|
|
|
|
self.incident_status = self.parse_incident_status(configuration)
|
|
|
|
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
@abc.abstractmethod
|
|
|
|
def get_status(self, response):
|
|
|
|
def get_status(self, response):
|
|
|
|
"""Returns the status of the API, following cachet's component status
|
|
|
|
"""Returns the status of the API, following cachet's component status
|
|
|
@@ -346,43 +346,58 @@ class Expectaction(object):
|
|
|
|
def get_message(self, response):
|
|
|
|
def get_message(self, response):
|
|
|
|
"""Gets the error message."""
|
|
|
|
"""Gets the error message."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
|
|
|
|
def get_default_incident(self):
|
|
|
|
|
|
|
|
"""Returns the default status when this incident happens."""
|
|
|
|
|
|
|
|
|
|
|
|
class HttpStatus(Expectaction):
|
|
|
|
def parse_incident_status(self, configuration):
|
|
|
|
|
|
|
|
return st.INCIDENT_MAP.get(configuration.get('incident', None), self.get_default_incident())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class HttpStatus(Expectation):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
self.status_range = HttpStatus.parse_range(configuration['status_range'])
|
|
|
|
self.status_range = HttpStatus.parse_range(configuration['status_range'])
|
|
|
|
|
|
|
|
super(HttpStatus, self).__init__(configuration)
|
|
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def parse_range(range_string):
|
|
|
|
def parse_range(range_string):
|
|
|
|
statuses = range_string.split("-")
|
|
|
|
statuses = range_string.split("-")
|
|
|
|
if len(statuses) == 1:
|
|
|
|
if len(statuses) == 1:
|
|
|
|
# When there was no range given, we should treat the first number as a single status check.
|
|
|
|
# When there was no range given, we should treat the first number as a single status check.
|
|
|
|
return (int(statuses[0]), int(statuses[0]) + 1)
|
|
|
|
return int(statuses[0]), int(statuses[0]) + 1
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
# We shouldn't look into more than one value, as this is a range value.
|
|
|
|
# We shouldn't look into more than one value, as this is a range value.
|
|
|
|
return (int(statuses[0]), int(statuses[1]))
|
|
|
|
return int(statuses[0]), int(statuses[1])
|
|
|
|
|
|
|
|
|
|
|
|
def get_status(self, response):
|
|
|
|
def get_status(self, response):
|
|
|
|
if response.status_code >= self.status_range[0] and response.status_code < self.status_range[1]:
|
|
|
|
if self.status_range[0] <= response.status_code < self.status_range[1]:
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
|
|
|
return self.incident_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_default_incident(self):
|
|
|
|
|
|
|
|
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
|
|
|
|
|
|
|
|
|
|
|
def get_message(self, response):
|
|
|
|
def get_message(self, response):
|
|
|
|
return 'Unexpected HTTP status (%s)' % (response.status_code,)
|
|
|
|
return f'Unexpected HTTP status ({response.status_code})'
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
def __str__(self):
|
|
|
|
return repr('HTTP status range: %s' % (self.status_range,))
|
|
|
|
return repr(f'HTTP status range: {self.status_range}')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Latency(Expectaction):
|
|
|
|
class Latency(Expectation):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
self.threshold = configuration['threshold']
|
|
|
|
self.threshold = configuration['threshold']
|
|
|
|
|
|
|
|
super(Latency, self).__init__(configuration)
|
|
|
|
|
|
|
|
|
|
|
|
def get_status(self, response):
|
|
|
|
def get_status(self, response):
|
|
|
|
if response.elapsed.total_seconds() <= self.threshold:
|
|
|
|
if response.elapsed.total_seconds() <= self.threshold:
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
return st.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
|
|
|
return self.incident_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_default_incident(self):
|
|
|
|
|
|
|
|
return st.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
|
|
|
|
|
|
|
|
|
|
|
def get_message(self, response):
|
|
|
|
def get_message(self, response):
|
|
|
|
return 'Latency above threshold: %.4f seconds' % (response.elapsed.total_seconds(),)
|
|
|
|
return 'Latency above threshold: %.4f seconds' % (response.elapsed.total_seconds(),)
|
|
|
@@ -391,19 +406,23 @@ class Latency(Expectaction):
|
|
|
|
return repr('Latency threshold: %.4f seconds' % (self.threshold,))
|
|
|
|
return repr('Latency threshold: %.4f seconds' % (self.threshold,))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Regex(Expectaction):
|
|
|
|
class Regex(Expectation):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
def __init__(self, configuration):
|
|
|
|
self.regex_string = configuration['regex']
|
|
|
|
self.regex_string = configuration['regex']
|
|
|
|
self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL)
|
|
|
|
self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL)
|
|
|
|
|
|
|
|
super(Regex, self).__init__(configuration)
|
|
|
|
|
|
|
|
|
|
|
|
def get_status(self, response):
|
|
|
|
def get_status(self, response):
|
|
|
|
if self.regex.match(response.text):
|
|
|
|
if self.regex.match(response.text):
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
return st.COMPONENT_STATUS_OPERATIONAL
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
|
|
|
return self.incident_status
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_default_incident(self):
|
|
|
|
|
|
|
|
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
|
|
|
|
|
|
|
|
|
|
|
def get_message(self, response):
|
|
|
|
def get_message(self, response):
|
|
|
|
return 'Regex did not match anything in the body'
|
|
|
|
return 'Regex did not match anything in the body'
|
|
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
def __str__(self):
|
|
|
|
return repr('Regex: %s' % (self.regex_string,))
|
|
|
|
return repr(f'Regex: {self.regex_string}')
|
|
|
|