mirror of
https://github.com/mtan93/cachet-url-monitor.git
synced 2026-03-08 05:31:58 +00:00
* #10 - Creating auxiliary client class to generate configuration class based on cachet's component list. * Updating the python version in codacy to reduce false positives * Moving some of the cachet operations to the client class to clean up the configuration class and making better constants * Refactoring status to have proper classes and adding more tests. Refactoring the requests tests to use requests-mock. * Removing unused imports from test_scheduler * Adding more tests and the ability to run the client from command line * Updating README and client arg parsing * Fixing broken unit tests
This commit is contained in:
@@ -9,8 +9,10 @@ import time
|
||||
import requests
|
||||
from yaml import dump
|
||||
|
||||
import cachet_url_monitor.latency_unit as latency_unit
|
||||
import cachet_url_monitor.status as st
|
||||
from cachet_url_monitor.client import CachetClient, normalize_url
|
||||
from cachet_url_monitor.exceptions import MetricNonexistentError
|
||||
from cachet_url_monitor.status import ComponentStatus
|
||||
|
||||
# This is the mandatory fields that must be in the configuration file in this
|
||||
# same exact structure.
|
||||
@@ -27,58 +29,17 @@ class ConfigurationValidationError(Exception):
|
||||
return repr(self.value)
|
||||
|
||||
|
||||
class ComponentNonexistentError(Exception):
|
||||
"""Exception raised when the component does not exist."""
|
||||
|
||||
def __init__(self, component_id):
|
||||
self.component_id = component_id
|
||||
|
||||
def __str__(self):
|
||||
return repr(f'Component with id [{self.component_id}] does not exist.')
|
||||
|
||||
|
||||
class MetricNonexistentError(Exception):
|
||||
"""Exception raised when the component does not exist."""
|
||||
|
||||
def __init__(self, metric_id):
|
||||
self.metric_id = metric_id
|
||||
|
||||
def __str__(self):
|
||||
return repr(f'Metric with id [{self.metric_id}] does not exist.')
|
||||
|
||||
|
||||
def get_current_status(endpoint_url, component_id, headers):
|
||||
"""Retrieves the current status of the component that is being monitored. It will fail if the component does
|
||||
not exist or doesn't respond with the expected data.
|
||||
:return component status.
|
||||
"""
|
||||
get_status_request = requests.get(f'{endpoint_url}/components/{component_id}', headers=headers)
|
||||
|
||||
if get_status_request.ok:
|
||||
# The component exists.
|
||||
return int(get_status_request.json()['data']['status'])
|
||||
else:
|
||||
raise ComponentNonexistentError(component_id)
|
||||
|
||||
|
||||
def normalize_url(url):
|
||||
"""If passed url doesn't include schema return it with default one - http."""
|
||||
if not url.lower().startswith('http'):
|
||||
return f'http://{url}'
|
||||
return url
|
||||
|
||||
|
||||
class Configuration(object):
|
||||
"""Represents a configuration file, but it also includes the functionality
|
||||
of assessing the API and pushing the results to cachet.
|
||||
"""
|
||||
|
||||
def __init__(self, config_file, endpoint_index):
|
||||
self.endpoint_index = endpoint_index
|
||||
self.data = config_file
|
||||
def __init__(self, config, endpoint_index: int):
|
||||
self.endpoint_index: int = endpoint_index
|
||||
self.data = config
|
||||
self.endpoint = self.data['endpoints'][endpoint_index]
|
||||
self.current_fails = 0
|
||||
self.trigger_update = True
|
||||
self.current_fails: int = 0
|
||||
self.trigger_update: bool = True
|
||||
|
||||
if 'name' not in self.endpoint:
|
||||
# We have to make this mandatory, otherwise the logs are confusing when there are multiple URLs.
|
||||
@@ -93,11 +54,11 @@ class Configuration(object):
|
||||
self.validate()
|
||||
|
||||
# We store the main information from the configuration file, so we don't keep reading from the data dictionary.
|
||||
self.headers = {'X-Cachet-Token': os.environ.get('CACHET_TOKEN') or self.data['cachet']['token']}
|
||||
self.token = os.environ.get('CACHET_TOKEN') or self.data['cachet']['token']
|
||||
self.headers = {'X-Cachet-Token': self.token}
|
||||
|
||||
self.endpoint_method = self.endpoint['method']
|
||||
self.endpoint_url = self.endpoint['url']
|
||||
self.endpoint_url = normalize_url(self.endpoint_url)
|
||||
self.endpoint_url = normalize_url(self.endpoint['url'])
|
||||
self.endpoint_timeout = self.endpoint.get('timeout') or 1
|
||||
self.endpoint_header = self.endpoint.get('header') or None
|
||||
self.allowed_fails = self.endpoint.get('allowed_fails') or 0
|
||||
@@ -106,6 +67,8 @@ class Configuration(object):
|
||||
self.component_id = self.endpoint['component_id']
|
||||
self.metric_id = self.endpoint.get('metric_id')
|
||||
|
||||
self.client = CachetClient(self.api_url, self.token)
|
||||
|
||||
if self.metric_id is not None:
|
||||
self.default_metric_value = self.get_default_metric_value(self.metric_id)
|
||||
|
||||
@@ -113,8 +76,9 @@ class Configuration(object):
|
||||
self.latency_unit = self.data['cachet'].get('latency_unit') or 's'
|
||||
|
||||
# We need the current status so we monitor the status changes. This is necessary for creating incidents.
|
||||
self.status = get_current_status(self.api_url, self.component_id, self.headers)
|
||||
self.status = self.client.get_component_status(self.component_id)
|
||||
self.previous_status = self.status
|
||||
self.logger.info(f'Component current status: {self.status}')
|
||||
|
||||
# Get remaining settings
|
||||
self.public_incidents = int(self.endpoint['public_incidents'])
|
||||
@@ -178,27 +142,27 @@ class Configuration(object):
|
||||
except requests.ConnectionError:
|
||||
self.message = 'The URL is unreachable: %s %s' % (self.endpoint_method, self.endpoint_url)
|
||||
self.logger.warning(self.message)
|
||||
self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
self.status = st.ComponentStatus.PARTIAL_OUTAGE
|
||||
return
|
||||
except requests.HTTPError:
|
||||
self.message = 'Unexpected HTTP response'
|
||||
self.logger.exception(self.message)
|
||||
self.status = st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
self.status = st.ComponentStatus.PARTIAL_OUTAGE
|
||||
return
|
||||
except requests.Timeout:
|
||||
except (requests.Timeout, requests.ConnectTimeout):
|
||||
self.message = 'Request timed out'
|
||||
self.logger.warning(self.message)
|
||||
self.status = st.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
||||
self.status = st.ComponentStatus.PERFORMANCE_ISSUES
|
||||
return
|
||||
|
||||
# We initially assume the API is healthy.
|
||||
self.status = st.COMPONENT_STATUS_OPERATIONAL
|
||||
self.status: ComponentStatus = st.ComponentStatus.OPERATIONAL
|
||||
self.message = ''
|
||||
for expectation in self.expectations:
|
||||
status = expectation.get_status(self.request)
|
||||
status: ComponentStatus = expectation.get_status(self.request)
|
||||
|
||||
# The greater the status is, the worse the state of the API is.
|
||||
if status > self.status:
|
||||
if status.value > self.status.value:
|
||||
self.status = status
|
||||
self.message = expectation.get_message(self.request)
|
||||
self.logger.info(self.message)
|
||||
@@ -220,7 +184,7 @@ class Configuration(object):
|
||||
and only for non-operational ones above the configured threshold (allowed_fails).
|
||||
"""
|
||||
|
||||
if self.status != 1:
|
||||
if self.status != st.ComponentStatus.OPERATIONAL:
|
||||
self.current_fails = self.current_fails + 1
|
||||
self.logger.warning(f'Failure #{self.current_fails} with threshold set to {self.allowed_fails}')
|
||||
if self.current_fails <= self.allowed_fails:
|
||||
@@ -234,27 +198,30 @@ class Configuration(object):
|
||||
status based on the previous call to evaluate().
|
||||
"""
|
||||
if self.previous_status == self.status:
|
||||
# We don't want to keep spamming if there's no change in status.
|
||||
self.logger.info(f'No changes to component status.')
|
||||
self.trigger_update = False
|
||||
return
|
||||
|
||||
self.previous_status = self.status
|
||||
|
||||
if not self.trigger_update:
|
||||
return
|
||||
|
||||
self.api_component_status = get_current_status(self.api_url, self.component_id, self.headers)
|
||||
api_component_status = self.client.get_component_status(self.component_id)
|
||||
|
||||
if self.status == self.api_component_status:
|
||||
if self.status == api_component_status:
|
||||
return
|
||||
self.status = api_component_status
|
||||
|
||||
params = {'id': self.component_id, 'status': self.status}
|
||||
component_request = requests.put('%s/components/%d' % (self.api_url, self.component_id), params=params,
|
||||
headers=self.headers)
|
||||
component_request = self.client.push_status(self.component_id, self.status)
|
||||
if component_request.ok:
|
||||
# Successful update
|
||||
self.logger.info('Component update: status [%d]' % (self.status,))
|
||||
self.logger.info(f'Component update: status [{self.status}]')
|
||||
else:
|
||||
# Failed to update the API status
|
||||
self.logger.warning('Component update failed with status [%d]: API'
|
||||
' status: [%d]' % (component_request.status_code, self.status))
|
||||
self.logger.warning(f'Component update failed with HTTP status: {component_request.status_code}. API'
|
||||
f' status: {self.status}')
|
||||
|
||||
def push_metrics(self):
|
||||
"""Pushes the total amount of seconds the request took to get a response from the URL.
|
||||
@@ -263,16 +230,11 @@ class Configuration(object):
|
||||
"""
|
||||
if 'metric_id' in self.data['cachet'] and hasattr(self, 'request'):
|
||||
# We convert the elapsed time from the request, in seconds, to the configured unit.
|
||||
value = self.default_metric_value if self.status != 1 else latency_unit.convert_to_unit(self.latency_unit,
|
||||
self.request.elapsed.total_seconds())
|
||||
params = {'id': self.metric_id, 'value': value,
|
||||
'timestamp': self.current_timestamp}
|
||||
metrics_request = requests.post('%s/metrics/%d/points' % (self.api_url, self.metric_id), params=params,
|
||||
headers=self.headers)
|
||||
|
||||
metrics_request = self.client.push_metrics(self.metric_id, self.latency_unit,
|
||||
self.request.elapsed.total_seconds(), self.current_timestamp)
|
||||
if metrics_request.ok:
|
||||
# Successful metrics upload
|
||||
self.logger.info('Metric uploaded: %.6f %s' % (value, self.latency_unit))
|
||||
self.logger.info('Metric uploaded: %.6f %s' % (self.request.elapsed.total_seconds(), self.latency_unit))
|
||||
else:
|
||||
self.logger.warning(f'Metric upload failed with status [{metrics_request.status_code}]')
|
||||
|
||||
@@ -282,14 +244,10 @@ class Configuration(object):
|
||||
"""
|
||||
if not self.trigger_update:
|
||||
return
|
||||
if hasattr(self, 'incident_id') and self.status == st.COMPONENT_STATUS_OPERATIONAL:
|
||||
# If the incident already exists, it means it was unhealthy but now it's healthy again.
|
||||
params = {'status': 4, 'visible': self.public_incidents, 'component_id': self.component_id,
|
||||
'component_status': self.status,
|
||||
'notify': True}
|
||||
if hasattr(self, 'incident_id') and self.status == st.ComponentStatus.OPERATIONAL:
|
||||
incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id,
|
||||
previous_incident_id=self.incident_id)
|
||||
|
||||
incident_request = requests.put(f'{self.api_url}/incidents/{self.incident_id}', params=params,
|
||||
headers=self.headers)
|
||||
if incident_request.ok:
|
||||
# Successful metrics upload
|
||||
self.logger.info(
|
||||
@@ -298,11 +256,9 @@ class Configuration(object):
|
||||
else:
|
||||
self.logger.warning(
|
||||
f'Incident update failed with status [{incident_request.status_code}], message: "{self.message}"')
|
||||
elif not hasattr(self, 'incident_id') and self.status != st.COMPONENT_STATUS_OPERATIONAL:
|
||||
# This is the first time the incident is being created.
|
||||
params = {'name': 'URL unavailable', 'message': self.message, 'status': 1, 'visible': self.public_incidents,
|
||||
'component_id': self.component_id, 'component_status': self.status, 'notify': True}
|
||||
incident_request = requests.post(f'{self.api_url}/incidents', params=params, headers=self.headers)
|
||||
elif not hasattr(self, 'incident_id') and self.status != st.ComponentStatus.OPERATIONAL:
|
||||
incident_request = self.client.push_incident(self.status, self.public_incidents, self.component_id,
|
||||
message=self.message)
|
||||
if incident_request.ok:
|
||||
# Successful incident upload.
|
||||
self.incident_id = incident_request.json()['data']['id']
|
||||
@@ -338,20 +294,20 @@ class Expectation(object):
|
||||
self.incident_status = self.parse_incident_status(configuration)
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_status(self, response):
|
||||
def get_status(self, response) -> ComponentStatus:
|
||||
"""Returns the status of the API, following cachet's component status
|
||||
documentation: https://docs.cachethq.io/docs/component-statuses
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_message(self, response):
|
||||
def get_message(self, response) -> str:
|
||||
"""Gets the error message."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_default_incident(self):
|
||||
"""Returns the default status when this incident happens."""
|
||||
|
||||
def parse_incident_status(self, configuration):
|
||||
def parse_incident_status(self, configuration) -> ComponentStatus:
|
||||
return st.INCIDENT_MAP.get(configuration.get('incident', None), self.get_default_incident())
|
||||
|
||||
|
||||
@@ -374,14 +330,14 @@ class HttpStatus(Expectation):
|
||||
# We shouldn't look into more than one value, as this is a range value.
|
||||
return int(statuses[0]), int(statuses[1])
|
||||
|
||||
def get_status(self, response):
|
||||
def get_status(self, response) -> ComponentStatus:
|
||||
if self.status_range[0] <= response.status_code < self.status_range[1]:
|
||||
return st.COMPONENT_STATUS_OPERATIONAL
|
||||
return st.ComponentStatus.OPERATIONAL
|
||||
else:
|
||||
return self.incident_status
|
||||
|
||||
def get_default_incident(self):
|
||||
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
return st.ComponentStatus.PARTIAL_OUTAGE
|
||||
|
||||
def get_message(self, response):
|
||||
return f'Unexpected HTTP status ({response.status_code})'
|
||||
@@ -395,14 +351,14 @@ class Latency(Expectation):
|
||||
self.threshold = configuration['threshold']
|
||||
super(Latency, self).__init__(configuration)
|
||||
|
||||
def get_status(self, response):
|
||||
def get_status(self, response) -> ComponentStatus:
|
||||
if response.elapsed.total_seconds() <= self.threshold:
|
||||
return st.COMPONENT_STATUS_OPERATIONAL
|
||||
return st.ComponentStatus.OPERATIONAL
|
||||
else:
|
||||
return self.incident_status
|
||||
|
||||
def get_default_incident(self):
|
||||
return st.COMPONENT_STATUS_PERFORMANCE_ISSUES
|
||||
return st.ComponentStatus.PERFORMANCE_ISSUES
|
||||
|
||||
def get_message(self, response):
|
||||
return 'Latency above threshold: %.4f seconds' % (response.elapsed.total_seconds(),)
|
||||
@@ -417,14 +373,14 @@ class Regex(Expectation):
|
||||
self.regex = re.compile(configuration['regex'], re.UNICODE + re.DOTALL)
|
||||
super(Regex, self).__init__(configuration)
|
||||
|
||||
def get_status(self, response):
|
||||
def get_status(self, response) -> ComponentStatus:
|
||||
if self.regex.match(response.text):
|
||||
return st.COMPONENT_STATUS_OPERATIONAL
|
||||
return st.ComponentStatus.OPERATIONAL
|
||||
else:
|
||||
return self.incident_status
|
||||
|
||||
def get_default_incident(self):
|
||||
return st.COMPONENT_STATUS_PARTIAL_OUTAGE
|
||||
return st.ComponentStatus.PARTIAL_OUTAGE
|
||||
|
||||
def get_message(self, response):
|
||||
return 'Regex did not match anything in the body'
|
||||
|
||||
Reference in New Issue
Block a user